List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:co.cask.hydrator.plugin.batch.source.ExcelReaderRegexFilter.java
License:Apache License
@Override public boolean accept(Path path) { try {//from w w w . j av a2 s .com fs = FileSystem.get(path.toUri(), conf); if (fs.isDirectory(path)) { return true; } boolean patternMatch = true; Matcher matcher = pattern.matcher(path.toString()); patternMatch = matcher.find(); if (patternMatch && !conf.getBoolean(RE_PROCESS, false) && CollectionUtils.isNotEmpty(preProcessedFileList)) { patternMatch = !preProcessedFileList.contains(path.toString()); } return patternMatch; } catch (IOException e) { return false; } }
From source file:co.cask.hydrator.plugin.common.BatchFileFilter.java
License:Apache License
@Override public boolean accept(Path path) { String filePathName = path.toString(); //The path filter will first check the directory if a directory is given if (filePathName.equals(pathName) || filePathName.equals(pathName + "/")) { return true; }/*w ww.java 2 s . com*/ //filter by file name using regex from configuration if (!useTimeFilter) { String fileName = path.getName(); Matcher matcher = regex.matcher(fileName); return matcher.matches(); } //use hourly time filter if (lastRead.equals("-1")) { String currentTime = sdf.format(prevHour); return filePathName.contains(currentTime); } //use stateful time filter Date fileDate; String filename = path.getName(); try { fileDate = sdf.parse(filename.substring(0, DATE_LENGTH)); } catch (Exception pe) { //Try to parse cloudfront format try { int startIndex = filename.indexOf(".") + 1; fileDate = sdf.parse(filename.substring(startIndex, startIndex + DATE_LENGTH)); } catch (Exception e) { LOG.warn("Couldn't parse file: " + filename); return false; } } return isWithinRange(fileDate); }
From source file:co.cask.hydrator.plugin.common.BatchXMLFileFilter.java
License:Apache License
@Override public boolean accept(Path path) { String filePathName = path.toString(); //The path filter will first check the directory if a directory is given if (filePathName.equals(pathName)) { return true; }/*www .j av a 2 s. c o m*/ Matcher matcher = regex.matcher(path.getName()); boolean patternMatch = matcher.find(); if (patternMatch && CollectionUtils.isNotEmpty(preProcessedFileList)) { patternMatch = !preProcessedFileList.contains(filePathName); } return patternMatch; }
From source file:co.cask.tephra.persist.HDFSUtil.java
License:Apache License
/** * Call HDFS-4525 isFileClosed if it is available. * @param dfs Filesystem instance to use. * @param m Method instance to call.//w ww.ja va 2s .c o m * @param p Path of the file to check is closed. * @return True if file is closed. */ private boolean isFileClosed(final DistributedFileSystem dfs, final Method m, final Path p) { try { return (Boolean) m.invoke(dfs, p); } catch (SecurityException e) { LOG.warn("No access", e); } catch (Exception e) { LOG.warn("Failed invocation for " + p.toString(), e); } return false; }
From source file:co.cask.tigon.data.hbase.HBaseTestBase.java
License:Apache License
public Path createHBaseRootDir(Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path hbaseRootdir = new Path(fs.makeQualified(fs.getHomeDirectory()), "hbase"); conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString()); fs.mkdirs(hbaseRootdir);//from w w w .j a v a 2 s . co m FSUtils.setVersion(fs, hbaseRootdir); return hbaseRootdir; }
From source file:co.nubetech.hiho.merge.MergeKeyMapper.java
License:Apache License
@Override protected void setup(Mapper.Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); InputSplit is = context.getInputSplit(); FileSplit fs = (FileSplit) is;/* w ww . j a v a2 s . c o m*/ Path splitPath = fs.getPath(); if (splitPath.toString().contains(conf.get(HIHOConf.MERGE_OLD_PATH))) { isOld = true; } else if (splitPath.toString().contains(conf.get(HIHOConf.MERGE_NEW_PATH))) { isOld = false; } else { throw new IOException("File " + splitPath + " is not under new path" + conf.get(HIHOConf.MERGE_NEW_PATH) + " and old path" + conf.get(HIHOConf.MERGE_OLD_PATH)); } }
From source file:ColumnStorage.ColumnProject.java
License:Open Source License
public ColumnProject(Path path, Configuration conf) throws Exception { String name = path.toString() + ConstVar.Navigator; Path naviPath = new Path(name); this.conf = conf; FileSystem fs = FileSystem.get(conf); loadColmnInfoFromHeadInfo(fs, path); }
From source file:ColumnStorage.ColumnProject.java
License:Open Source License
void loadColmnInfoFromNavigator(FileSystem fs, Path naviPath) throws Exception { FSDataInputStream in = fs.open(naviPath); int magic = in.readInt(); if (magic != ConstVar.NaviMagic) { throw new SEException.ErrorFileFormat("invalid navi magic:" + magic + ",file:" + naviPath.toString()); }//from w w w. ja v a 2 s. c o m short infoNum = in.readShort(); for (int i = 0; i < infoNum; i++) { infos.add(loadColumnInfo(in)); } }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This/*from ww w . j av a 2 s . com*/ * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, HTable table) throws IOException { Configuration conf = job.getConfiguration(); Class<? extends Partitioner> topClass; try { topClass = getTotalOrderPartitionerClass(); } catch (ClassNotFoundException e) { throw new IOException("Failed getting TotalOrderPartitioner", e); } //partition job.setPartitionerClass(topClass); //Set the key class for the job output data job.setOutputKeyClass(ImmutableBytesWritable.class); //Set the value class for job outputs job.setOutputValueClass(KeyValue.class); //outputformatHfile job.setOutputFormatClass(HFileOutputFormat2.class); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(SingleColumnReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } LOG.info("Looking up current regions for table " + table); //?regionstarkey List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); //?region?reduce? job.setNumReduceTasks(startKeys.size()); Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + UUID.randomUUID()); LOG.info("Writing partition information to " + partitionsPath); FileSystem fs = partitionsPath.getFileSystem(conf); writePartitions(conf, partitionsPath, startKeys); partitionsPath.makeQualified(fs); URI cacheUri; try { // Below we make explicit reference to the bundled TOP. Its cheating. // We are assume the define in the hbase bundled TOP is as it is in // hadoop (whether 0.20 or 0.22, etc.) /* cacheUri = new URI(partitionsPath.toString() + "#" + org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH); */ cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.addCacheFile(cacheUri, conf); DistributedCache.createSymlink(conf); // Set compression algorithms based on column families configureCompression(table, conf); TableMapReduceUtil.addDependencyJars(job); LOG.info("Incremental table output configured."); }
From source file:com.alectenharmsel.research.WholeBlockRecordReader.java
License:Apache License
public boolean nextKeyValue() throws IOException, InterruptedException { if (!processed) { System.err.println("start is " + start); Path file = fileSplit.getPath(); String tmp = file.toString(); System.err.println("File: " + tmp); currKey.set(tmp);//w ww .j av a 2 s. com System.err.println("Reached this point"); FileSystem fs = file.getFileSystem(conf); System.err.println("fs blocksize: " + fs.getDefaultBlockSize(file)); System.err.println("linecount blocksize: " + blockSize); byte[] contents; FSDataInputStream in = null; try { in = fs.open(file); System.err.println("getPos(): " + in.getPos()); if ((start + blockSize) > fileLength) { blockSize = (int) (fileLength - start); processed = true; } contents = new byte[blockSize]; //IOUtils.readFully(in, contents, start, blockSize); //IOUtils.readFully(in, contents, 0, blockSize); in.readFully(start, contents); start += blockSize; currValue.set(contents); } finally { IOUtils.closeStream(in); } return true; } return false; }