List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.moz.fiji.schema.impl.hbase.HBaseFijiTable.java
License:Apache License
/** * Loads partitioned HFiles directly into the regions of this Fiji table. * * @param hfilePath Path of the HFiles to load. * @throws IOException on I/O error.//www. j a va 2 s .c o m */ public void bulkLoad(Path hfilePath) throws IOException { final LoadIncrementalHFiles loader = createHFileLoader(mConf); final String hFileScheme = hfilePath.toUri().getScheme(); Token<DelegationTokenIdentifier> hdfsDelegationToken = null; // If we're bulk loading from a secure HDFS, we should request and forward a delegation token. // LoadIncrementalHfiles will actually do this if none is provided, but because we call it // repeatedly in a short amount of time, this seems to trigger a possible race condition // where we ask to load the next HFile while there is a pending token cancellation request. // By requesting the token ourselves, it is re-used for each bulk load call. // Once we're done with the bulk loader we cancel the token. if (UserGroupInformation.isSecurityEnabled() && hFileScheme.equals(HDFS_SCHEME)) { final UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); final DistributedFileSystem fileSystem = (DistributedFileSystem) hfilePath.getFileSystem(mConf); hdfsDelegationToken = fileSystem.getDelegationToken(RENEWER); ugi.addToken(hdfsDelegationToken); } try { // LoadIncrementalHFiles.doBulkLoad() requires an HTable instance, not an HTableInterface: final HTable htable = (HTable) mHTableFactory.create(mConf, mHBaseTableName); try { final List<Path> hfilePaths = Lists.newArrayList(); // Try to find any hfiles for partitions within the passed in path final FileStatus[] hfiles = hfilePath.getFileSystem(mConf).globStatus(new Path(hfilePath, "*")); for (FileStatus hfile : hfiles) { String partName = hfile.getPath().getName(); if (!partName.startsWith("_") && partName.endsWith(".hfile")) { Path partHFile = new Path(hfilePath, partName); hfilePaths.add(partHFile); } } if (hfilePaths.isEmpty()) { // If we didn't find any parts, add in the passed in parameter hfilePaths.add(hfilePath); } for (Path path : hfilePaths) { loader.doBulkLoad(path, htable); LOG.info("Successfully loaded: " + path.toString()); } } finally { htable.close(); } } catch (TableNotFoundException tnfe) { throw new InternalFijiError(tnfe); } // Cancel the HDFS delegation token if we requested one. if (null != hdfsDelegationToken) { try { hdfsDelegationToken.cancel(mConf); } catch (InterruptedException e) { LOG.warn("Failed to cancel HDFS delegation token.", e); } } }
From source file:com.moz.fiji.schema.tools.CreateTableTool.java
License:Apache License
/** * Determines whether a path has its filesystem explicitly specified. Did it start * with "hdfs://" or "file://"?//ww w. jav a2 s .c o m * * @param path The path to check. * @return Whether a file system was explicitly specified in the path. */ private static boolean fileSystemSpecified(Path path) { return null != path.toUri().getScheme(); }
From source file:com.moz.fiji.schema.tools.LayoutTool.java
License:Apache License
/** * Determines whether a path has its filesystem explicitly specified. Did it start * with "hdfs://" or "file://"?// w w w . j av a 2 s.c om * * @param path The path to check. * @return Whether a file system was explicitly specified in the path. */ protected static boolean fileSystemSpecified(Path path) { return null != path.toUri().getScheme(); }
From source file:com.mozilla.grouperfish.pig.eval.ml.TFIDFVectorizer.java
License:Apache License
private void loadFeatureIndex(String featureIndexPath) throws IOException { if (featureIndex == null) { featureIndex = new HashMap<String, Integer>(); Path p = new Path(featureIndexPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); int index = 0; for (FileStatus status : fs.listStatus(p)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { featureIndex.put(line.trim(), index++); }/*from w w w . j a va 2 s. c o m*/ } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded feature index with size: " + featureIndex.size()); } }
From source file:com.mozilla.grouperfish.text.Dictionary.java
License:Apache License
public static Set<String> loadDictionary(Path dictionaryPath) throws IOException { Set<String> dictionary = null; FileSystem fs = null;//from ww w . j av a2 s .c om try { fs = FileSystem.get(dictionaryPath.toUri(), new Configuration()); dictionary = loadDictionary(fs, dictionaryPath); } finally { if (fs != null) { fs.close(); } } return dictionary; }
From source file:com.mozilla.grouperfish.text.Dictionary.java
License:Apache License
public static Map<String, Integer> loadFeatureIndex(Path dictionaryPath) throws IOException { Map<String, Integer> featureIndex = null; FileSystem fs = null;/*from w w w . jav a2 s. c o m*/ try { fs = FileSystem.get(dictionaryPath.toUri(), new Configuration()); featureIndex = loadFeatureIndex(fs, dictionaryPath); } finally { if (fs != null) { fs.close(); } } return featureIndex; }
From source file:com.mozilla.grouperfish.text.Dictionary.java
License:Apache License
public static Map<Integer, String> loadInvertedFeatureIndex(Path dictionaryPath) throws IOException { Map<Integer, String> featureIndex = null; FileSystem fs = null;//from www.java 2 s.c o m try { fs = FileSystem.get(dictionaryPath.toUri(), new Configuration()); featureIndex = loadInvertedFeatureIndex(fs, dictionaryPath); } finally { if (fs != null) { fs.close(); } } return featureIndex; }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertDocumentIDToID.java
License:Apache License
private void loadDocumentIndex(String documentIndexPath) throws IOException { if (documentIndex == null) { documentIndex = new HashMap<String, Integer>(); Path p = new Path(documentIndexPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); int index = 0; for (FileStatus status : fs.listStatus(p)) { Path currPath = status.getPath(); if (!status.isDir() && !currPath.getName().startsWith("_")) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(currPath))); String line = null; while ((line = reader.readLine()) != null) { documentIndex.put(line.trim(), index++); }/*from w ww. j a va 2s . c o m*/ } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded document index with size: " + documentIndex.size()); } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertFeatureToID.java
License:Apache License
private void loadFeatureIndex(String featureIndexPath) throws IOException { if (featureIndex == null) { featureIndex = new HashMap<String, Integer>(); Path p = new Path(featureIndexPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); int index = 0; for (FileStatus status : fs.listStatus(p)) { Path currPath = status.getPath(); if (!status.isDir() && !currPath.getName().startsWith("_")) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(currPath))); String line = null; while ((line = reader.readLine()) != null) { featureIndex.put(line.trim(), index++); }/*from w w w. ja va 2s. c o m*/ } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded feature index with size: " + featureIndex.size()); } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.UnigramExtractor.java
License:Apache License
private void loadStopwordDict() throws IOException { if (stopwordDictPath != null) { stopwords = new HashSet<String>(); FileSystem hdfs = null;/* w w w. j a va 2 s . c o m*/ Path p = new Path(stopwordDictPath); hdfs = FileSystem.get(p.toUri(), new Configuration()); for (FileStatus status : hdfs.listStatus(p)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { stopwords.add(line.trim()); } } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded stopword dictionary with size: " + stopwords.size()); } }