List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.cloudera.impala.util.TestLoadMetadataUtil.java
License:Apache License
/** * Test if it returns the correct file descriptor when the filepath is a normal file * without cache.//from w w w.java 2 s. c om */ private void testFileWithoutCache(MethodName methodName) throws IOException { Map<FsKey, FileBlocksInfo> perFsFileBlocks = Maps.newHashMap(); Map<String, List<FileDescriptor>> fileDescMap = Maps.newHashMap(); Path filePath = createFileInHdfs("file"); List<FileDescriptor> fileDesclist = null; switch (methodName) { case LOAD_FILE_DESCRIPTORS: fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, filePath, null, HdfsFileFormat.TEXT, perFsFileBlocks, false, filePath.getName(), null, fileDescMap); break; case LOAD_VIA_LOCATED_FILE_STATUS: fileDesclist = LoadMetadataUtil.loadViaListLocatedStatus(fs_, filePath, null, HdfsFileFormat.TEXT, perFsFileBlocks, false, filePath.getName(), null, fileDescMap); break; case LOAD_VIA_LIST_STATUS_ITERATOR: fileDesclist = LoadMetadataUtil.loadViaListStatusIterator(fs_, filePath, null, HdfsFileFormat.TEXT, perFsFileBlocks, false, filePath.getName(), null, fileDescMap); break; default: LOG.error("Unsupported enum method name"); Preconditions.checkState(false); } for (FsKey key : perFsFileBlocks.keySet()) { assertEquals(HDFS_BASE_PATH, key.toString()); } FileStatus fileStatus = fs_.getFileStatus(filePath); assertEquals(1, fileDesclist.size()); assertEquals(filePath.getName(), fileDesclist.get(0).getFileName()); assertEquals(fileStatus.getLen(), fileDesclist.get(0).getFileLength()); assertEquals(fileStatus.getModificationTime(), fileDesclist.get(0).getModificationTime()); }
From source file:com.cloudera.impala.util.TestLoadMetadataUtil.java
License:Apache License
/** * Test if it returns the same file descriptor when the filepath is a normal file with * cache.// w w w. j a v a 2s. c o m */ private void testFileWithCache(MethodName methodName) throws IOException { Map<FsKey, FileBlocksInfo> perFsFileBlocks = Maps.newHashMap(); Map<String, List<FileDescriptor>> fileDescMap = Maps.newHashMap(); // Create old file description map Path cacheFilePath = createFileInHdfs("fileWithCache"); Map<String, List<FileDescriptor>> oldFileDescMap = Maps.newHashMap(); List<FileDescriptor> cacheList = new LinkedList<FileDescriptor>(); FileStatus fileStatus = fs_.getFileStatus(cacheFilePath); FileDescriptor fdInCache = new FileDescriptor(cacheFilePath.getName(), fileStatus.getLen(), fileStatus.getModificationTime()); cacheList.add(fdInCache); oldFileDescMap.put(fileStatus.getPath().getParent().toString(), cacheList); List<FileDescriptor> fileDesclist = null; switch (methodName) { case LOAD_FILE_DESCRIPTORS: fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap, HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap); break; case LOAD_VIA_LOCATED_FILE_STATUS: fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap, HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap); break; case LOAD_VIA_LIST_STATUS_ITERATOR: fileDesclist = LoadMetadataUtil.loadFileDescriptors(fs_, cacheFilePath, oldFileDescMap, HdfsFileFormat.TEXT, perFsFileBlocks, false, cacheFilePath.getName(), null, fileDescMap); break; default: LOG.error("Unsupported enum method name"); Preconditions.checkState(false); } for (FsKey key : perFsFileBlocks.keySet()) { assertEquals(HDFS_BASE_PATH, key.toString()); } assertEquals(1, fileDesclist.size()); assertEquals(fdInCache, fileDesclist.get(0)); }
From source file:com.cloudera.kitten.lua.AsapLuaContainerLaunchParameters.java
License:Open Source License
private void configureLocalScriptResourceForPath(LocalResource rsrc, Path path) throws IOException { //System.out.println("URI: "+path.toUri()); FileSystem fs = FileSystem.get(conf); Path dst = new Path(dir + "/" + path.getName()); fs.moveFromLocalFile(path, dst);/* www . j a v a 2s.c om*/ dst = fs.makeQualified(dst); FileStatus stat = fs.getFileStatus(dst); rsrc.setSize(stat.getLen()); rsrc.setTimestamp(stat.getModificationTime()); rsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); }
From source file:com.cloudera.kitten.lua.AsapLuaContainerLaunchParameters.java
License:Open Source License
private NamedLocalResource constructResource(LuaPair lp) throws IOException { LocalResource rsrc = Records.newRecord(LocalResource.class); LuaWrapper value = new LuaWrapper(lp.value.checktable()); String name = lp.key.isint() ? "" : lp.key.tojstring(); if (value.isNil(LuaFields.LOCAL_RESOURCE_TYPE)) { rsrc.setType(LocalResourceType.FILE); } else {// w w w .j a v a 2 s .c o m rsrc.setType(LocalResourceType.valueOf(value.getString(LuaFields.LOCAL_RESOURCE_TYPE).toUpperCase())); } if (value.isNil(LuaFields.LOCAL_RESOURCE_VISIBILITY)) { rsrc.setVisibility(LocalResourceVisibility.APPLICATION); } else { rsrc.setVisibility(LocalResourceVisibility .valueOf(value.getString(LuaFields.LOCAL_RESOURCE_VISIBILITY).toUpperCase())); } if (!value.isNil(LuaFields.LOCAL_RESOURCE_URL)) { URI uri = URI.create(value.getString(LuaFields.LOCAL_RESOURCE_URL)); rsrc.setResource(ConverterUtils.getYarnUrlFromURI(uri)); if (name.isEmpty()) { name = (new File(uri.getPath())).getName(); } } else if (!value.isNil(LuaFields.LOCAL_RESOURCE_HDFS_FILE)) { Path path = new Path(value.getString(LuaFields.LOCAL_RESOURCE_HDFS_FILE)); configureLocalResourceForPath(rsrc, path); if (name.isEmpty()) { name = path.getName(); } } else if (!value.isNil(LuaFields.LOCAL_RESOURCE_LOCAL_FILE)) { String src = value.getString(LuaFields.LOCAL_RESOURCE_LOCAL_FILE); Path path = new Path(localFileUris.get(src)); configureLocalResourceForPath(rsrc, path); if (name.isEmpty()) { name = new Path(src).getName(); } } else { throw new IllegalArgumentException("Invalid resource: no 'url', 'hdfs', or 'file' fields specified."); } return new NamedLocalResource(name, rsrc); }
From source file:com.cloudera.kitten.util.LocalDataHelper.java
License:Open Source License
private void copyToHdfs(String key, String localDataName) throws IOException { if (!localToHdfs.containsKey(localDataName)) { FileSystem fs = FileSystem.get(conf); Path src = new Path(localDataName); Path dst = getPath(fs, src.getName()); InputStream data = getFileOrResource(localDataName); FSDataOutputStream os = fs.create(dst, true); ByteStreams.copy(data, os);// w w w. ja v a2 s . c o m os.close(); URI uri = dst.toUri(); localToHdfs.put(key, uri); } }
From source file:com.cloudera.oryx.common.servcomp.FilesOrDirsPathFilter.java
License:Open Source License
@Override public boolean accept(Path maybeListPath) { try {//from ww w . ja va 2 s . co m String name = maybeListPath.getName(); return !name.endsWith("_SUCCESS") && !name.startsWith(".") && fs.isFile(maybeListPath) == files; } catch (IOException e) { throw new IllegalStateException(e); } }
From source file:com.cloudera.recordbreaker.analyzer.CSVDataDescriptor.java
License:Open Source License
/** * Test whether a given file is amenable to CSV processing *///from w w w .j a va 2 s . co m public static boolean isCSV(FileSystem fs, Path p) { String fname = p.getName(); if (fname.endsWith(".csv")) { return true; } CSVParser parser = new CSVParser(); try { BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(p))); try { int lineCount = 0; List<Integer> observedEltCounts = new ArrayList<Integer>(); int totalEltCount = 0; int minEltCount = Integer.MAX_VALUE; int maxEltCount = -1; String line = null; while (lineCount < MAX_LINES && ((line = in.readLine()) != null)) { String parts[] = parser.parseLine(line); int numElts = parts.length; minEltCount = Math.min(minEltCount, numElts); maxEltCount = Math.max(maxEltCount, numElts); totalEltCount += numElts; observedEltCounts.add(numElts); lineCount++; } double meanEltCount = totalEltCount / (1.0 * observedEltCounts.size()); double totalVariance = 0; for (Integer v : observedEltCounts) { totalVariance += Math.pow(v - meanEltCount, 2); } double variance = totalVariance / observedEltCounts.size(); double stddev = Math.sqrt(variance); if (lineCount >= MIN_LINE_COUNT && meanEltCount >= MIN_MEAN_ELTS && ((stddev / meanEltCount) < MAX_ALLOWABLE_LINE_STDDEV)) { return true; } } finally { in.close(); } } catch (IOException ie) { } return false; }
From source file:com.cloudera.recordbreaker.analyzer.FormatAnalyzer.java
License:Open Source License
/** * Create a file-appropriate DataDescriptor instance. * * Right now we just use the file ending to figure out what to do, * but this will become unsatisfactory pretty quickly. * * @param f a <code>File</code> value * @return a <code>DataDescriptor</code> value */// w w w . j a v a2 s. co m public DataDescriptor describeData(FileSystem fs, Path p) throws IOException { FileStatus fstatus = fs.getFileStatus(p); String fname = p.getName(); // Test to see if the file is one of a handful of known structured formats. if (CSVDataDescriptor.isCSV(fs, p)) { return new CSVDataDescriptor(p, fs); } else if (fname.endsWith(".xml")) { return new XMLDataDescriptor(p, fs); } else if (fname.endsWith(".avro")) { return new AvroDataDescriptor(p, fs); } else if (AvroSequenceFileDataDescriptor.isAvroSequenceFile(fs, p)) { return new AvroSequenceFileDataDescriptor(p, fs); } else if (SequenceFileDataDescriptor.isSequenceFile(fs, p)) { return new SequenceFileDataDescriptor(p, fs); } else if (ApacheDataDescriptor.isApacheLogFile(fs, p)) { return new ApacheDataDescriptor(p, fs); } else if (SyslogDataDescriptor.isSyslogFile(fs, p)) { return new SyslogDataDescriptor(p, fs); } else { // It's not one of the known formats, so apply LearnStructure // to obtain the structure. if (UnknownTextDataDescriptor.isTextData(fs, p)) { try { return new UnknownTextDataDescriptor(fs, p, schemaDbDir); } catch (Exception iex) { //iex.printStackTrace(); } } // If that doesn't work, then give up and call it unstructured. You // can't run queries on data in this format. return new UnstructuredFileDescriptor(fs, p); } }
From source file:com.cloudera.recordbreaker.analyzer.FSAnalyzer.java
License:Open Source License
/** * <code>addFileMetadata</code> stores the pathname, size, owner, etc. *//*from w ww .j ava 2 s .c o m*/ void addFileMetadata(final FileStatus fstatus, final long crawlId) { // Compute strings to represent file metadata Path insertFile = fstatus.getPath(); final boolean isDir = fstatus.isDir(); FsPermission fsp = fstatus.getPermission(); final String permissions = (isDir ? "d" : "-") + fsp.getUserAction().SYMBOL + fsp.getGroupAction().SYMBOL + fsp.getOtherAction().SYMBOL; // Compute formal pathname representation String fnameString = null; String parentPathString = null; if (isDir && insertFile.getParent() == null) { parentPathString = ""; fnameString = insertFile.toString(); } else { fnameString = insertFile.getName(); parentPathString = insertFile.getParent().toString(); // REMIND --- mjc --- If we want to modify the Files table s.t. it does // not contain the filesystem prefix, then this would be the place to do it. if (!parentPathString.endsWith("/")) { parentPathString = parentPathString + "/"; } } final String parentPath = parentPathString; final String fName = fnameString; final long fileId = dbQueue.execute(new SQLiteJob<Long>() { protected Long job(SQLiteConnection db) throws SQLiteException { SQLiteStatement stmt = db.prepare("INSERT into Files VALUES(null, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); try { stmt.bind(1, isDir ? "True" : "False").bind(2, crawlId).bind(3, fName) .bind(4, fstatus.getOwner()).bind(5, fstatus.getGroup()).bind(6, permissions) .bind(7, fstatus.getLen()) .bind(8, fileDateFormat.format(new Date(fstatus.getModificationTime()))) .bind(9, parentPath); stmt.step(); return db.getLastInsertId(); } finally { stmt.dispose(); } } }).complete(); }
From source file:com.cloudera.science.quince.LoadVariantsTool.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JCommander jc = new JCommander(this); try {// ww w . j a v a 2s . co m jc.parse(args); } catch (ParameterException e) { jc.usage(); return 1; } if (paths == null || paths.size() != 2) { jc.usage(); return 1; } String inputPath = paths.get(0); String outputPath = paths.get(1); Configuration conf = getConf(); // Copy records to avoid problem with Parquet string statistics not being correct. // This can be removed from parquet 1.8.0 // (see https://issues.apache.org/jira/browse/PARQUET-251). conf.setBoolean(DatasetKeyOutputFormat.KITE_COPY_RECORDS, true); Path path = new Path(inputPath); if (path.getName().endsWith(".vcf")) { int size = 500000; byte[] bytes = new byte[size]; InputStream inputStream = path.getFileSystem(conf).open(path); inputStream.read(bytes, 0, size); conf.set(VariantContextToVariantFn.VARIANT_HEADER, Base64.encodeBase64String(bytes)); } Pipeline pipeline = new MRPipeline(getClass(), conf); PCollection<Variant> records = readVariants(path, conf, pipeline); PCollection<FlatVariant> flatRecords = records.parallelDo(new FlattenVariantFn(), Avros.specifics(FlatVariant.class)); DatasetDescriptor desc = new DatasetDescriptor.Builder().schema(FlatVariant.getClassSchema()) .partitionStrategy(buildPartitionStrategy(segmentSize)).format(Formats.PARQUET) .compressionType(CompressionType.Uncompressed).build(); View<FlatVariant> dataset; if (Datasets.exists(outputPath)) { dataset = Datasets.load(outputPath, FlatVariant.class).getDataset().with("sample_group", sampleGroup); } else { dataset = Datasets.create(outputPath, desc, FlatVariant.class).getDataset().with("sample_group", sampleGroup); } int numReducers = conf.getInt("mapreduce.job.reduces", 1); System.out.println("Num reducers: " + numReducers); final Schema sortKeySchema = SchemaBuilder.record("sortKey").fields().requiredString("sampleId") .endRecord(); PCollection<FlatVariant> partitioned = CrunchDatasets.partitionAndSort(flatRecords, dataset, new FlatVariantRecordMapFn(sortKeySchema), sortKeySchema, numReducers, 1); try { Target.WriteMode writeMode = overwrite ? Target.WriteMode.OVERWRITE : Target.WriteMode.DEFAULT; pipeline.write(partitioned, CrunchDatasets.asTarget(dataset), writeMode); } catch (CrunchRuntimeException e) { LOG.error("Crunch runtime error", e); return 1; } PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }