Example usage for org.apache.hadoop.fs Path toString

List of usage examples for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:co.cask.hydrator.plugin.batch.source.ExcelReaderRegexFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    try {//from w  w w . j av a2 s  .com
        fs = FileSystem.get(path.toUri(), conf);
        if (fs.isDirectory(path)) {
            return true;
        }

        boolean patternMatch = true;
        Matcher matcher = pattern.matcher(path.toString());
        patternMatch = matcher.find();
        if (patternMatch && !conf.getBoolean(RE_PROCESS, false)
                && CollectionUtils.isNotEmpty(preProcessedFileList)) {
            patternMatch = !preProcessedFileList.contains(path.toString());
        }

        return patternMatch;
    } catch (IOException e) {
        return false;
    }
}

From source file:co.cask.hydrator.plugin.common.BatchFileFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    String filePathName = path.toString();
    //The path filter will first check the directory if a directory is given
    if (filePathName.equals(pathName) || filePathName.equals(pathName + "/")) {
        return true;
    }/*w ww.java 2 s  .  com*/

    //filter by file name using regex from configuration
    if (!useTimeFilter) {
        String fileName = path.getName();
        Matcher matcher = regex.matcher(fileName);
        return matcher.matches();
    }

    //use hourly time filter
    if (lastRead.equals("-1")) {
        String currentTime = sdf.format(prevHour);
        return filePathName.contains(currentTime);
    }

    //use stateful time filter
    Date fileDate;
    String filename = path.getName();
    try {
        fileDate = sdf.parse(filename.substring(0, DATE_LENGTH));
    } catch (Exception pe) {
        //Try to parse cloudfront format
        try {
            int startIndex = filename.indexOf(".") + 1;
            fileDate = sdf.parse(filename.substring(startIndex, startIndex + DATE_LENGTH));
        } catch (Exception e) {
            LOG.warn("Couldn't parse file: " + filename);
            return false;
        }
    }
    return isWithinRange(fileDate);
}

From source file:co.cask.hydrator.plugin.common.BatchXMLFileFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    String filePathName = path.toString();
    //The path filter will first check the directory if a directory is given
    if (filePathName.equals(pathName)) {
        return true;
    }/*www .j  av a  2  s. c  o m*/
    Matcher matcher = regex.matcher(path.getName());
    boolean patternMatch = matcher.find();
    if (patternMatch && CollectionUtils.isNotEmpty(preProcessedFileList)) {
        patternMatch = !preProcessedFileList.contains(filePathName);
    }
    return patternMatch;
}

From source file:co.cask.tephra.persist.HDFSUtil.java

License:Apache License

/**
 * Call HDFS-4525 isFileClosed if it is available.
 * @param dfs Filesystem instance to use.
 * @param m Method instance to call.//w ww.ja va 2s .c  o m
 * @param p Path of the file to check is closed.
 * @return True if file is closed.
 */
private boolean isFileClosed(final DistributedFileSystem dfs, final Method m, final Path p) {
    try {
        return (Boolean) m.invoke(dfs, p);
    } catch (SecurityException e) {
        LOG.warn("No access", e);
    } catch (Exception e) {
        LOG.warn("Failed invocation for " + p.toString(), e);
    }
    return false;
}

From source file:co.cask.tigon.data.hbase.HBaseTestBase.java

License:Apache License

public Path createHBaseRootDir(Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path hbaseRootdir = new Path(fs.makeQualified(fs.getHomeDirectory()), "hbase");
    conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
    fs.mkdirs(hbaseRootdir);//from   w w  w .j a v  a 2 s  . co m
    FSUtils.setVersion(fs, hbaseRootdir);
    return hbaseRootdir;
}

From source file:co.nubetech.hiho.merge.MergeKeyMapper.java

License:Apache License

@Override
protected void setup(Mapper.Context context) throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    InputSplit is = context.getInputSplit();
    FileSplit fs = (FileSplit) is;/*  w ww  . j a  v a2 s . c o  m*/
    Path splitPath = fs.getPath();

    if (splitPath.toString().contains(conf.get(HIHOConf.MERGE_OLD_PATH))) {
        isOld = true;
    } else if (splitPath.toString().contains(conf.get(HIHOConf.MERGE_NEW_PATH))) {
        isOld = false;
    } else {
        throw new IOException("File " + splitPath + " is not under new path" + conf.get(HIHOConf.MERGE_NEW_PATH)
                + " and old path" + conf.get(HIHOConf.MERGE_OLD_PATH));
    }
}

From source file:ColumnStorage.ColumnProject.java

License:Open Source License

public ColumnProject(Path path, Configuration conf) throws Exception {
    String name = path.toString() + ConstVar.Navigator;
    Path naviPath = new Path(name);

    this.conf = conf;
    FileSystem fs = FileSystem.get(conf);

    loadColmnInfoFromHeadInfo(fs, path);

}

From source file:ColumnStorage.ColumnProject.java

License:Open Source License

void loadColmnInfoFromNavigator(FileSystem fs, Path naviPath) throws Exception {
    FSDataInputStream in = fs.open(naviPath);

    int magic = in.readInt();
    if (magic != ConstVar.NaviMagic) {
        throw new SEException.ErrorFileFormat("invalid navi magic:" + magic + ",file:" + naviPath.toString());
    }//from w  w  w. ja  v a  2 s. c o m

    short infoNum = in.readShort();
    for (int i = 0; i < infoNum; i++) {
        infos.add(loadColumnInfo(in));
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This/*from   ww  w  .  j  av a 2 s .  com*/
 * <ul>
 *   <li>Inspects the table to configure a total order partitioner</li>
 *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
 *   <li>Sets the number of reduce tasks to match the current number of regions</li>
 *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
 *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
 *     PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either KeyValue or Put before
 * running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
    Configuration conf = job.getConfiguration();
    Class<? extends Partitioner> topClass;
    try {
        topClass = getTotalOrderPartitionerClass();
    } catch (ClassNotFoundException e) {
        throw new IOException("Failed getting TotalOrderPartitioner", e);
    }
    //partition
    job.setPartitionerClass(topClass);
    //Set the key class for the job output data
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    //Set the value class for job outputs
    job.setOutputValueClass(KeyValue.class);
    //outputformatHfile
    job.setOutputFormatClass(HFileOutputFormat2.class);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(SingleColumnReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    LOG.info("Looking up current regions for table " + table);
    //?regionstarkey
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");

    //?region?reduce?
    job.setNumReduceTasks(startKeys.size());

    Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + UUID.randomUUID());
    LOG.info("Writing partition information to " + partitionsPath);

    FileSystem fs = partitionsPath.getFileSystem(conf);
    writePartitions(conf, partitionsPath, startKeys);
    partitionsPath.makeQualified(fs);

    URI cacheUri;
    try {
        // Below we make explicit reference to the bundled TOP.  Its cheating.
        // We are assume the define in the hbase bundled TOP is as it is in
        // hadoop (whether 0.20 or 0.22, etc.)
        /*
          cacheUri = new URI(partitionsPath.toString() + "#" +
            org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH);
            */
        cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    DistributedCache.addCacheFile(cacheUri, conf);
    DistributedCache.createSymlink(conf);

    // Set compression algorithms based on column families
    configureCompression(table, conf);

    TableMapReduceUtil.addDependencyJars(job);
    LOG.info("Incremental table output configured.");
}

From source file:com.alectenharmsel.research.WholeBlockRecordReader.java

License:Apache License

public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!processed) {
        System.err.println("start is " + start);
        Path file = fileSplit.getPath();
        String tmp = file.toString();
        System.err.println("File: " + tmp);
        currKey.set(tmp);//w  ww .j  av  a  2  s.  com
        System.err.println("Reached this point");
        FileSystem fs = file.getFileSystem(conf);
        System.err.println("fs blocksize: " + fs.getDefaultBlockSize(file));
        System.err.println("linecount blocksize: " + blockSize);
        byte[] contents;
        FSDataInputStream in = null;
        try {
            in = fs.open(file);
            System.err.println("getPos(): " + in.getPos());

            if ((start + blockSize) > fileLength) {
                blockSize = (int) (fileLength - start);
                processed = true;
            }

            contents = new byte[blockSize];

            //IOUtils.readFully(in, contents, start, blockSize);
            //IOUtils.readFully(in, contents, 0, blockSize);
            in.readFully(start, contents);

            start += blockSize;

            currValue.set(contents);
        } finally {
            IOUtils.closeStream(in);
        }
        return true;
    }
    return false;
}