List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.cloudera.science.quince.LoadVariantsTool.java
License:Open Source License
private static PCollection<Variant> readVariants(Path path, Configuration conf, Pipeline pipeline) throws IOException { Path file = SchemaUtils.findFile(path, conf); if (file.getName().endsWith(".avro")) { return pipeline.read(From.avroFile(path, Avros.specifics(Variant.class))); } else if (file.getName().endsWith(".parquet")) { @SuppressWarnings("unchecked") Source<Variant> source = new AvroParquetFileSource(path, Avros.specifics(Variant.class)); return pipeline.read(source); } else if (file.getName().endsWith(".vcf")) { TableSource<LongWritable, VariantContextWritable> vcfSource = From.formattedFile(path, VCFInputFormat.class, LongWritable.class, VariantContextWritable.class); return pipeline.read(vcfSource).parallelDo(new VariantContextToVariantFn(), Avros.specifics(Variant.class)); }/*from w w w . j a v a 2s . c o m*/ throw new IllegalStateException("Unrecognized format for " + file); }
From source file:com.cloudera.science.quince.SchemaUtils.java
License:Open Source License
public static Format readFormat(Path path) { if (path.getName().endsWith(".avro")) { return Formats.AVRO; } else if (path.getName().endsWith(".parquet")) { return Formats.PARQUET; }/*from w w w . j a v a 2s. c om*/ throw new IllegalStateException("Unrecognized format for " + path); }
From source file:com.cloudera.science.quince.SchemaUtils.java
License:Open Source License
public static Path findFile(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); if (fs.isDirectory(path)) { FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() { @Override/*from w w w.ja v a2s.c o m*/ public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); return fileStatuses[0].getPath(); } else { return path; } }
From source file:com.cloudera.science.quince.VCFToGA4GHVariantFn.java
License:Open Source License
public static void configureHeaders(Configuration conf, Path[] vcfs, String sampleGroup) throws IOException { List<VCFHeader> headers = new ArrayList<>(); for (Path vcf : vcfs) { InputStream inputStream = vcf.getFileSystem(conf).open(vcf); VcfBlockIterator iterator = new VcfBlockIterator(inputStream, new FullVcfCodec()); VCFHeader header = iterator.getHeader(); header.addMetaDataLine(new VCFHeaderLine(VARIANT_SET_ID, vcf.getName())); headers.add(header);//from w ww . java 2 s .c om } VCFHeader[] headersArray = headers.toArray(new VCFHeader[headers.size()]); conf.set(VARIANT_HEADERS, Base64.encodeBase64String(SerializationUtils.serialize(headersArray))); if (sampleGroup != null) { conf.set(SAMPLE_GROUP, sampleGroup); } }
From source file:com.cloudera.seismic.segy.SegyUnloader.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption("input", true, "SU sequence files to export from Hadoop"); options.addOption("output", true, "The local SU file to write"); // Parse the commandline and check for required arguments. CommandLine cmdLine = new PosixParser().parse(options, args, false); if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) { System.out.println("Mising required input/output arguments"); new HelpFormatter().printHelp("SegyUnloader", options); System.exit(1);//from www . j av a 2 s.com } Configuration conf = getConf(); FileSystem hdfs = FileSystem.get(conf); Path inputPath = new Path(cmdLine.getOptionValue("input")); if (!hdfs.exists(inputPath)) { System.out.println("Input path does not exist"); System.exit(1); } PathFilter pf = new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().startsWith("_"); } }; DataOutputStream os = new DataOutputStream(new FileOutputStream(cmdLine.getOptionValue("output"))); for (FileStatus fs : hdfs.listStatus(inputPath, pf)) { write(fs.getPath(), os, conf); } os.close(); return 0; }
From source file:com.cloudera.sqoop.TestAppendUtils.java
License:Apache License
/** @return the number part of a partition */ private int getFilePartition(Path file) { String filename = file.getName(); int pos = filename.lastIndexOf(FILEPART_SEPARATOR); if (pos != -1) { String part = filename.substring(pos + 1, pos + 1 + PARTITION_DIGITS); return Integer.parseInt(part); } else {/* w w w . j a va 2 s.c om*/ return 0; } }
From source file:com.cloudera.sqoop.TestExport.java
License:Apache License
/** Export some rows from a SequenceFile, make sure they import correctly. */ public void testSequenceFileExport() throws Exception { final int TOTAL_RECORDS = 10; // First, generate class and jar files that represent the table // we're exporting to. LOG.info("Creating initial schema for SeqFile test"); createTable();//from w w w .j ava 2 s .c o m LOG.info("Generating code..."); CodeGenTool codeGen = new CodeGenTool(); String[] codeGenArgs = getCodeGenArgv(); SqoopOptions options = codeGen.parseArguments(codeGenArgs, null, null, true); codeGen.validateOptions(options); int ret = codeGen.run(options); assertEquals(0, ret); List<String> generatedJars = codeGen.getGeneratedJarFiles(); // Now, wipe the created table so we can export on top of it again. LOG.info("Resetting schema and data..."); createTable(); // Wipe the directory we use when creating files to export to ensure // it's ready for new SequenceFiles. removeTablePath(); assertNotNull(generatedJars); assertEquals("Expected 1 generated jar file", 1, generatedJars.size()); String jarFileName = generatedJars.get(0); // Sqoop generates jars named "foo.jar"; by default, this should contain a // class named 'foo'. Extract the class name. Path jarPath = new Path(jarFileName); String jarBaseName = jarPath.getName(); assertTrue(jarBaseName.endsWith(".jar")); assertTrue(jarBaseName.length() > ".jar".length()); String className = jarBaseName.substring(0, jarBaseName.length() - ".jar".length()); LOG.info("Using jar filename: " + jarFileName); LOG.info("Using class name: " + className); ClassLoader prevClassLoader = null; try { if (null != jarFileName) { prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, className); } // Now use this class and jar name to create a sequence file. LOG.info("Writing data to SequenceFiles"); createSequenceFile(0, TOTAL_RECORDS, className); // Now run and verify the export. LOG.info("Exporting SequenceFile-based data"); runExport(getArgv(true, 10, 10, "--class-name", className, "--jar-file", jarFileName)); verifyExport(TOTAL_RECORDS); } finally { if (null != prevClassLoader) { ClassLoaderStack.setCurrentClassLoader(prevClassLoader); } } }
From source file:com.cloudera.sqoop.TestMerge.java
License:Apache License
/** * Return true if there's a file in 'dirName' with a line that starts with * 'prefix'.// w w w. j a v a 2s.co m */ protected boolean recordStartsWith(String prefix, String dirName) throws Exception { Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR); Path targetPath = new Path(warehousePath, dirName); FileSystem fs = FileSystem.getLocal(new Configuration()); FileStatus[] files = fs.listStatus(targetPath); if (null == files || files.length == 0) { fail("Got no import files!"); } for (FileStatus stat : files) { Path p = stat.getPath(); if (p.getName().startsWith("part-")) { if (checkFileForLine(fs, p, prefix)) { // We found the line. Nothing further to do. return true; } } } return false; }
From source file:com.cloudera.sqoop.tool.ImportTool.java
License:Apache License
/** * @return the output path for the imported files; * in append mode this will point to a temporary folder. * if importing to hbase, this may return null. *//* w w w . j a va 2s. c o m*/ private Path getOutputPath(SqoopOptions options, String tableName) { // Get output directory String hdfsWarehouseDir = options.getWarehouseDir(); String hdfsTargetDir = options.getTargetDir(); Path outputPath = null; if (options.isAppendMode()) { // Use temporary path, later removed when appending outputPath = AppendUtils.getTempAppendDir(tableName); LOG.debug("Using temporary folder: " + outputPath.getName()); } else { // Try in this order: target-dir or warehouse-dir if (hdfsTargetDir != null) { outputPath = new Path(hdfsTargetDir); } else if (hdfsWarehouseDir != null) { outputPath = new Path(hdfsWarehouseDir, tableName); } else if (null != tableName) { outputPath = new Path(tableName); } } return outputPath; }
From source file:com.cloudera.sqoop.util.AppendUtils.java
License:Apache License
/** * Moves the imported files from temporary directory to specified target-dir, * renaming partition number if appending file exists. *//* w ww. j a va2 s . co m*/ public void append() throws IOException { SqoopOptions options = context.getOptions(); FileSystem fs = FileSystem.get(options.getConf()); Path tempDir = context.getDestination(); // Try in this order: target-dir or warehouse-dir Path userDestDir = null; if (options.getTargetDir() != null) { userDestDir = new Path(options.getTargetDir()); } else if (options.getWarehouseDir() != null) { userDestDir = new Path(options.getWarehouseDir(), context.getTableName()); } else { userDestDir = new Path(context.getTableName()); } int nextPartition = 0; if (!fs.exists(tempDir)) { // This occurs if there was no source (tmp) dir. This might happen // if the import was an HBase-target import, but the user specified // --append anyway. This is a warning, not an error. LOG.warn("Cannot append files to target dir; no such directory: " + tempDir); return; } // Create target directory. if (!fs.exists(userDestDir)) { LOG.info("Creating missing output directory - " + userDestDir.getName()); fs.mkdirs(userDestDir); nextPartition = 0; } else { LOG.info("Appending to directory " + userDestDir.getName()); // Get the right next partition for the imported files nextPartition = getNextPartition(fs, userDestDir); } // move files moveFiles(fs, tempDir, userDestDir, nextPartition); // delete temporary path LOG.debug("Deleting temporary folder " + tempDir.getName()); fs.delete(tempDir, true); }