Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.moz.fiji.schema.impl.hbase.HBaseFijiTable.java

License:Apache License

/**
 * Loads partitioned HFiles directly into the regions of this Fiji table.
 *
 * @param hfilePath Path of the HFiles to load.
 * @throws IOException on I/O error.//www. j a va  2 s  .c  o  m
 */
public void bulkLoad(Path hfilePath) throws IOException {
    final LoadIncrementalHFiles loader = createHFileLoader(mConf);

    final String hFileScheme = hfilePath.toUri().getScheme();
    Token<DelegationTokenIdentifier> hdfsDelegationToken = null;

    // If we're bulk loading from a secure HDFS, we should request and forward a delegation token.
    // LoadIncrementalHfiles will actually do this if none is provided, but because we call it
    // repeatedly in a short amount of time, this seems to trigger a possible race condition
    // where we ask to load the next HFile while there is a pending token cancellation request.
    // By requesting the token ourselves, it is re-used for each bulk load call.
    // Once we're done with the bulk loader we cancel the token.
    if (UserGroupInformation.isSecurityEnabled() && hFileScheme.equals(HDFS_SCHEME)) {
        final UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
        final DistributedFileSystem fileSystem = (DistributedFileSystem) hfilePath.getFileSystem(mConf);
        hdfsDelegationToken = fileSystem.getDelegationToken(RENEWER);
        ugi.addToken(hdfsDelegationToken);
    }

    try {
        // LoadIncrementalHFiles.doBulkLoad() requires an HTable instance, not an HTableInterface:
        final HTable htable = (HTable) mHTableFactory.create(mConf, mHBaseTableName);
        try {
            final List<Path> hfilePaths = Lists.newArrayList();

            // Try to find any hfiles for partitions within the passed in path
            final FileStatus[] hfiles = hfilePath.getFileSystem(mConf).globStatus(new Path(hfilePath, "*"));
            for (FileStatus hfile : hfiles) {
                String partName = hfile.getPath().getName();
                if (!partName.startsWith("_") && partName.endsWith(".hfile")) {
                    Path partHFile = new Path(hfilePath, partName);
                    hfilePaths.add(partHFile);
                }
            }
            if (hfilePaths.isEmpty()) {
                // If we didn't find any parts, add in the passed in parameter
                hfilePaths.add(hfilePath);
            }
            for (Path path : hfilePaths) {
                loader.doBulkLoad(path, htable);
                LOG.info("Successfully loaded: " + path.toString());
            }
        } finally {
            htable.close();
        }
    } catch (TableNotFoundException tnfe) {
        throw new InternalFijiError(tnfe);
    }

    // Cancel the HDFS delegation token if we requested one.
    if (null != hdfsDelegationToken) {
        try {
            hdfsDelegationToken.cancel(mConf);
        } catch (InterruptedException e) {
            LOG.warn("Failed to cancel HDFS delegation token.", e);
        }
    }
}

From source file:com.moz.fiji.schema.tools.CreateTableTool.java

License:Apache License

/**
 * Determines whether a path has its filesystem explicitly specified.  Did it start
 * with "hdfs://" or "file://"?//ww w. jav  a2  s  .c o  m
 *
 * @param path The path to check.
 * @return Whether a file system was explicitly specified in the path.
 */
private static boolean fileSystemSpecified(Path path) {
    return null != path.toUri().getScheme();
}

From source file:com.moz.fiji.schema.tools.LayoutTool.java

License:Apache License

/**
 * Determines whether a path has its filesystem explicitly specified.  Did it start
 * with "hdfs://" or "file://"?//  w w  w . j  av a 2 s.c  om
 *
 * @param path The path to check.
 * @return Whether a file system was explicitly specified in the path.
 */
protected static boolean fileSystemSpecified(Path path) {
    return null != path.toUri().getScheme();
}

From source file:com.mozilla.grouperfish.pig.eval.ml.TFIDFVectorizer.java

License:Apache License

private void loadFeatureIndex(String featureIndexPath) throws IOException {
    if (featureIndex == null) {
        featureIndex = new HashMap<String, Integer>();

        Path p = new Path(featureIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        featureIndex.put(line.trim(), index++);
                    }/*from  w w w .  j a va  2 s. c o  m*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded feature index with size: " + featureIndex.size());
    }
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Set<String> loadDictionary(Path dictionaryPath) throws IOException {
    Set<String> dictionary = null;
    FileSystem fs = null;//from   ww  w  .  j  av a2 s .c om
    try {
        fs = FileSystem.get(dictionaryPath.toUri(), new Configuration());
        dictionary = loadDictionary(fs, dictionaryPath);
    } finally {
        if (fs != null) {
            fs.close();
        }
    }

    return dictionary;
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Map<String, Integer> loadFeatureIndex(Path dictionaryPath) throws IOException {
    Map<String, Integer> featureIndex = null;
    FileSystem fs = null;/*from  w w w  . jav a2 s.  c o  m*/
    try {
        fs = FileSystem.get(dictionaryPath.toUri(), new Configuration());
        featureIndex = loadFeatureIndex(fs, dictionaryPath);
    } finally {
        if (fs != null) {
            fs.close();
        }
    }

    return featureIndex;
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Map<Integer, String> loadInvertedFeatureIndex(Path dictionaryPath) throws IOException {
    Map<Integer, String> featureIndex = null;
    FileSystem fs = null;//from www.java  2  s.c  o m
    try {
        fs = FileSystem.get(dictionaryPath.toUri(), new Configuration());
        featureIndex = loadInvertedFeatureIndex(fs, dictionaryPath);
    } finally {
        if (fs != null) {
            fs.close();
        }
    }

    return featureIndex;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertDocumentIDToID.java

License:Apache License

private void loadDocumentIndex(String documentIndexPath) throws IOException {
    if (documentIndex == null) {
        documentIndex = new HashMap<String, Integer>();

        Path p = new Path(documentIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        documentIndex.put(line.trim(), index++);
                    }/*from w ww. j  a va  2s . c  o  m*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded document index with size: " + documentIndex.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertFeatureToID.java

License:Apache License

private void loadFeatureIndex(String featureIndexPath) throws IOException {
    if (featureIndex == null) {
        featureIndex = new HashMap<String, Integer>();

        Path p = new Path(featureIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        featureIndex.put(line.trim(), index++);
                    }/*from   w w  w. ja va 2s.  c  o m*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded feature index with size: " + featureIndex.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.UnigramExtractor.java

License:Apache License

private void loadStopwordDict() throws IOException {
    if (stopwordDictPath != null) {
        stopwords = new HashSet<String>();

        FileSystem hdfs = null;/*  w w  w. j a  va  2 s  .  c  o m*/
        Path p = new Path(stopwordDictPath);
        hdfs = FileSystem.get(p.toUri(), new Configuration());
        for (FileStatus status : hdfs.listStatus(p)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        stopwords.add(line.trim());
                    }
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded stopword dictionary with size: " + stopwords.size());
    }
}