Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.cloudera.impala.util.TestLoadMetadataUtil.java

License:Apache License

/**
 * Test if it returns the correct file descriptor when the filepath is a normal file
 * without cache.//from  w w w.java 2 s. c om
 */
private void testFileWithoutCache(MethodName methodName) throws IOException {
    Map<FsKey, FileBlocksInfo> perFsFileBlocks = Maps.newHashMap();
    Map<String, List<FileDescriptor>> fileDescMap = Maps.newHashMap();

    Path filePath = createFileInHdfs("file");
    List<FileDescriptor> fileDesclist = null;
    switch (methodName) {
    case LOAD_FILE_DESCRIPTORS:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, filePath, null, HdfsFileFormat.TEXT,
                perFsFileBlocks, false, filePath.getName(), null, fileDescMap);
        break;
    case LOAD_VIA_LOCATED_FILE_STATUS:
        fileDesclist = LoadMetadataUtil.loadViaListLocatedStatus(fs_, filePath, null, HdfsFileFormat.TEXT,
                perFsFileBlocks, false, filePath.getName(), null, fileDescMap);
        break;
    case LOAD_VIA_LIST_STATUS_ITERATOR:
        fileDesclist = LoadMetadataUtil.loadViaListStatusIterator(fs_, filePath, null, HdfsFileFormat.TEXT,
                perFsFileBlocks, false, filePath.getName(), null, fileDescMap);
        break;
    default:
        LOG.error("Unsupported enum method name");
        Preconditions.checkState(false);
    }

    for (FsKey key : perFsFileBlocks.keySet()) {
        assertEquals(HDFS_BASE_PATH, key.toString());
    }

    FileStatus fileStatus = fs_.getFileStatus(filePath);
    assertEquals(1, fileDesclist.size());
    assertEquals(filePath.getName(), fileDesclist.get(0).getFileName());
    assertEquals(fileStatus.getLen(), fileDesclist.get(0).getFileLength());
    assertEquals(fileStatus.getModificationTime(), fileDesclist.get(0).getModificationTime());
}

From source file:com.cloudera.impala.util.TestLoadMetadataUtil.java

License:Apache License

/**
 * Test if it returns the same file descriptor when the filepath is a normal file with
 * cache.// w  w w.  j a v a  2s.  c  o m
 */
private void testFileWithCache(MethodName methodName) throws IOException {
    Map<FsKey, FileBlocksInfo> perFsFileBlocks = Maps.newHashMap();
    Map<String, List<FileDescriptor>> fileDescMap = Maps.newHashMap();

    // Create old file description map
    Path cacheFilePath = createFileInHdfs("fileWithCache");
    Map<String, List<FileDescriptor>> oldFileDescMap = Maps.newHashMap();
    List<FileDescriptor> cacheList = new LinkedList<FileDescriptor>();
    FileStatus fileStatus = fs_.getFileStatus(cacheFilePath);
    FileDescriptor fdInCache = new FileDescriptor(cacheFilePath.getName(), fileStatus.getLen(),
            fileStatus.getModificationTime());
    cacheList.add(fdInCache);
    oldFileDescMap.put(fileStatus.getPath().getParent().toString(), cacheList);
    List<FileDescriptor> fileDesclist = null;
    switch (methodName) {
    case LOAD_FILE_DESCRIPTORS:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap,
                HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap);
        break;
    case LOAD_VIA_LOCATED_FILE_STATUS:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap,
                HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap);
        break;
    case LOAD_VIA_LIST_STATUS_ITERATOR:
        fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap,
                HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap);
        break;
    default:
        LOG.error("Unsupported enum method name");
        Preconditions.checkState(false);
    }

    for (FsKey key : perFsFileBlocks.keySet()) {
        assertEquals(HDFS_BASE_PATH, key.toString());
    }
    assertEquals(1, fileDesclist.size());
    assertEquals(fdInCache, fileDesclist.get(0));
}

From source file:com.cloudera.kitten.lua.AsapLuaContainerLaunchParameters.java

License:Open Source License

private void configureLocalScriptResourceForPath(LocalResource rsrc, Path path) throws IOException {
    //System.out.println("URI: "+path.toUri());
    FileSystem fs = FileSystem.get(conf);

    Path dst = new Path(dir + "/" + path.getName());
    fs.moveFromLocalFile(path, dst);/* www  .  j a v  a  2s.c  om*/
    dst = fs.makeQualified(dst);

    FileStatus stat = fs.getFileStatus(dst);
    rsrc.setSize(stat.getLen());
    rsrc.setTimestamp(stat.getModificationTime());
    rsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
}

From source file:com.cloudera.kitten.lua.AsapLuaContainerLaunchParameters.java

License:Open Source License

private NamedLocalResource constructResource(LuaPair lp) throws IOException {
    LocalResource rsrc = Records.newRecord(LocalResource.class);
    LuaWrapper value = new LuaWrapper(lp.value.checktable());
    String name = lp.key.isint() ? "" : lp.key.tojstring();
    if (value.isNil(LuaFields.LOCAL_RESOURCE_TYPE)) {
        rsrc.setType(LocalResourceType.FILE);
    } else {//  w  w  w .j  a v  a 2  s .c o m
        rsrc.setType(LocalResourceType.valueOf(value.getString(LuaFields.LOCAL_RESOURCE_TYPE).toUpperCase()));
    }
    if (value.isNil(LuaFields.LOCAL_RESOURCE_VISIBILITY)) {
        rsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    } else {
        rsrc.setVisibility(LocalResourceVisibility
                .valueOf(value.getString(LuaFields.LOCAL_RESOURCE_VISIBILITY).toUpperCase()));
    }
    if (!value.isNil(LuaFields.LOCAL_RESOURCE_URL)) {
        URI uri = URI.create(value.getString(LuaFields.LOCAL_RESOURCE_URL));
        rsrc.setResource(ConverterUtils.getYarnUrlFromURI(uri));
        if (name.isEmpty()) {
            name = (new File(uri.getPath())).getName();
        }
    } else if (!value.isNil(LuaFields.LOCAL_RESOURCE_HDFS_FILE)) {
        Path path = new Path(value.getString(LuaFields.LOCAL_RESOURCE_HDFS_FILE));
        configureLocalResourceForPath(rsrc, path);
        if (name.isEmpty()) {
            name = path.getName();
        }
    } else if (!value.isNil(LuaFields.LOCAL_RESOURCE_LOCAL_FILE)) {
        String src = value.getString(LuaFields.LOCAL_RESOURCE_LOCAL_FILE);
        Path path = new Path(localFileUris.get(src));
        configureLocalResourceForPath(rsrc, path);
        if (name.isEmpty()) {
            name = new Path(src).getName();
        }
    } else {
        throw new IllegalArgumentException("Invalid resource: no 'url', 'hdfs', or 'file' fields specified.");
    }
    return new NamedLocalResource(name, rsrc);
}

From source file:com.cloudera.kitten.util.LocalDataHelper.java

License:Open Source License

private void copyToHdfs(String key, String localDataName) throws IOException {
    if (!localToHdfs.containsKey(localDataName)) {
        FileSystem fs = FileSystem.get(conf);
        Path src = new Path(localDataName);
        Path dst = getPath(fs, src.getName());
        InputStream data = getFileOrResource(localDataName);
        FSDataOutputStream os = fs.create(dst, true);
        ByteStreams.copy(data, os);//  w w w. ja v  a2 s  . c o  m
        os.close();
        URI uri = dst.toUri();
        localToHdfs.put(key, uri);
    }
}

From source file:com.cloudera.oryx.common.servcomp.FilesOrDirsPathFilter.java

License:Open Source License

@Override
public boolean accept(Path maybeListPath) {
    try {//from ww  w .  ja  va  2 s  . co m
        String name = maybeListPath.getName();
        return !name.endsWith("_SUCCESS") && !name.startsWith(".") && fs.isFile(maybeListPath) == files;
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:com.cloudera.recordbreaker.analyzer.CSVDataDescriptor.java

License:Open Source License

/**
 * Test whether a given file is amenable to CSV processing
 *///from w  w w .j  a va 2 s  .  co m
public static boolean isCSV(FileSystem fs, Path p) {
    String fname = p.getName();
    if (fname.endsWith(".csv")) {
        return true;
    }
    CSVParser parser = new CSVParser();
    try {
        BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(p)));
        try {
            int lineCount = 0;
            List<Integer> observedEltCounts = new ArrayList<Integer>();
            int totalEltCount = 0;
            int minEltCount = Integer.MAX_VALUE;
            int maxEltCount = -1;

            String line = null;
            while (lineCount < MAX_LINES && ((line = in.readLine()) != null)) {
                String parts[] = parser.parseLine(line);
                int numElts = parts.length;
                minEltCount = Math.min(minEltCount, numElts);
                maxEltCount = Math.max(maxEltCount, numElts);
                totalEltCount += numElts;
                observedEltCounts.add(numElts);

                lineCount++;
            }
            double meanEltCount = totalEltCount / (1.0 * observedEltCounts.size());
            double totalVariance = 0;
            for (Integer v : observedEltCounts) {
                totalVariance += Math.pow(v - meanEltCount, 2);
            }
            double variance = totalVariance / observedEltCounts.size();
            double stddev = Math.sqrt(variance);
            if (lineCount >= MIN_LINE_COUNT && meanEltCount >= MIN_MEAN_ELTS
                    && ((stddev / meanEltCount) < MAX_ALLOWABLE_LINE_STDDEV)) {
                return true;
            }
        } finally {
            in.close();
        }
    } catch (IOException ie) {
    }
    return false;
}

From source file:com.cloudera.recordbreaker.analyzer.FormatAnalyzer.java

License:Open Source License

/**
 * Create a file-appropriate DataDescriptor instance.
 *
 * Right now we just use the file ending to figure out what to do,
 * but this will become unsatisfactory pretty quickly.
 *
 * @param f a <code>File</code> value
 * @return a <code>DataDescriptor</code> value
 */// w  w  w  . j  a  v  a2  s. co m
public DataDescriptor describeData(FileSystem fs, Path p) throws IOException {
    FileStatus fstatus = fs.getFileStatus(p);
    String fname = p.getName();

    // Test to see if the file is one of a handful of known structured formats.
    if (CSVDataDescriptor.isCSV(fs, p)) {
        return new CSVDataDescriptor(p, fs);
    } else if (fname.endsWith(".xml")) {
        return new XMLDataDescriptor(p, fs);
    } else if (fname.endsWith(".avro")) {
        return new AvroDataDescriptor(p, fs);
    } else if (AvroSequenceFileDataDescriptor.isAvroSequenceFile(fs, p)) {
        return new AvroSequenceFileDataDescriptor(p, fs);
    } else if (SequenceFileDataDescriptor.isSequenceFile(fs, p)) {
        return new SequenceFileDataDescriptor(p, fs);
    } else if (ApacheDataDescriptor.isApacheLogFile(fs, p)) {
        return new ApacheDataDescriptor(p, fs);
    } else if (SyslogDataDescriptor.isSyslogFile(fs, p)) {
        return new SyslogDataDescriptor(p, fs);
    } else {
        // It's not one of the known formats, so apply LearnStructure 
        // to obtain the structure.
        if (UnknownTextDataDescriptor.isTextData(fs, p)) {
            try {
                return new UnknownTextDataDescriptor(fs, p, schemaDbDir);
            } catch (Exception iex) {
                //iex.printStackTrace();
            }
        }
        // If that doesn't work, then give up and call it unstructured.  You
        // can't run queries on data in this format.
        return new UnstructuredFileDescriptor(fs, p);
    }
}

From source file:com.cloudera.recordbreaker.analyzer.FSAnalyzer.java

License:Open Source License

/**
 * <code>addFileMetadata</code> stores the pathname, size, owner, etc.
 *//*from w ww .j  ava  2 s .c o  m*/
void addFileMetadata(final FileStatus fstatus, final long crawlId) {
    // Compute strings to represent file metadata
    Path insertFile = fstatus.getPath();
    final boolean isDir = fstatus.isDir();
    FsPermission fsp = fstatus.getPermission();
    final String permissions = (isDir ? "d" : "-") + fsp.getUserAction().SYMBOL + fsp.getGroupAction().SYMBOL
            + fsp.getOtherAction().SYMBOL;

    // Compute formal pathname representation
    String fnameString = null;
    String parentPathString = null;
    if (isDir && insertFile.getParent() == null) {
        parentPathString = "";
        fnameString = insertFile.toString();
    } else {
        fnameString = insertFile.getName();
        parentPathString = insertFile.getParent().toString();

        // REMIND --- mjc --- If we want to modify the Files table s.t. it does
        // not contain the filesystem prefix, then this would be the place to do it.

        if (!parentPathString.endsWith("/")) {
            parentPathString = parentPathString + "/";
        }
    }
    final String parentPath = parentPathString;
    final String fName = fnameString;
    final long fileId = dbQueue.execute(new SQLiteJob<Long>() {
        protected Long job(SQLiteConnection db) throws SQLiteException {
            SQLiteStatement stmt = db.prepare("INSERT into Files VALUES(null, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
            try {
                stmt.bind(1, isDir ? "True" : "False").bind(2, crawlId).bind(3, fName)
                        .bind(4, fstatus.getOwner()).bind(5, fstatus.getGroup()).bind(6, permissions)
                        .bind(7, fstatus.getLen())
                        .bind(8, fileDateFormat.format(new Date(fstatus.getModificationTime())))
                        .bind(9, parentPath);
                stmt.step();
                return db.getLastInsertId();
            } finally {
                stmt.dispose();
            }
        }
    }).complete();
}

From source file:com.cloudera.science.quince.LoadVariantsTool.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JCommander jc = new JCommander(this);
    try {// ww  w  . j  a v  a  2s . co m
        jc.parse(args);
    } catch (ParameterException e) {
        jc.usage();
        return 1;
    }

    if (paths == null || paths.size() != 2) {
        jc.usage();
        return 1;
    }

    String inputPath = paths.get(0);
    String outputPath = paths.get(1);

    Configuration conf = getConf();
    // Copy records to avoid problem with Parquet string statistics not being correct.
    // This can be removed from parquet 1.8.0
    // (see https://issues.apache.org/jira/browse/PARQUET-251).
    conf.setBoolean(DatasetKeyOutputFormat.KITE_COPY_RECORDS, true);

    Path path = new Path(inputPath);

    if (path.getName().endsWith(".vcf")) {
        int size = 500000;
        byte[] bytes = new byte[size];
        InputStream inputStream = path.getFileSystem(conf).open(path);
        inputStream.read(bytes, 0, size);
        conf.set(VariantContextToVariantFn.VARIANT_HEADER, Base64.encodeBase64String(bytes));
    }

    Pipeline pipeline = new MRPipeline(getClass(), conf);
    PCollection<Variant> records = readVariants(path, conf, pipeline);

    PCollection<FlatVariant> flatRecords = records.parallelDo(new FlattenVariantFn(),
            Avros.specifics(FlatVariant.class));

    DatasetDescriptor desc = new DatasetDescriptor.Builder().schema(FlatVariant.getClassSchema())
            .partitionStrategy(buildPartitionStrategy(segmentSize)).format(Formats.PARQUET)
            .compressionType(CompressionType.Uncompressed).build();

    View<FlatVariant> dataset;
    if (Datasets.exists(outputPath)) {
        dataset = Datasets.load(outputPath, FlatVariant.class).getDataset().with("sample_group", sampleGroup);
    } else {
        dataset = Datasets.create(outputPath, desc, FlatVariant.class).getDataset().with("sample_group",
                sampleGroup);
    }

    int numReducers = conf.getInt("mapreduce.job.reduces", 1);
    System.out.println("Num reducers: " + numReducers);

    final Schema sortKeySchema = SchemaBuilder.record("sortKey").fields().requiredString("sampleId")
            .endRecord();

    PCollection<FlatVariant> partitioned = CrunchDatasets.partitionAndSort(flatRecords, dataset,
            new FlatVariantRecordMapFn(sortKeySchema), sortKeySchema, numReducers, 1);

    try {
        Target.WriteMode writeMode = overwrite ? Target.WriteMode.OVERWRITE : Target.WriteMode.DEFAULT;
        pipeline.write(partitioned, CrunchDatasets.asTarget(dataset), writeMode);
    } catch (CrunchRuntimeException e) {
        LOG.error("Crunch runtime error", e);
        return 1;
    }

    PipelineResult result = pipeline.done();
    return result.succeeded() ? 0 : 1;

}