Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.cloudera.science.quince.LoadVariantsTool.java

License:Open Source License

private static PCollection<Variant> readVariants(Path path, Configuration conf, Pipeline pipeline)
        throws IOException {
    Path file = SchemaUtils.findFile(path, conf);
    if (file.getName().endsWith(".avro")) {
        return pipeline.read(From.avroFile(path, Avros.specifics(Variant.class)));
    } else if (file.getName().endsWith(".parquet")) {
        @SuppressWarnings("unchecked")
        Source<Variant> source = new AvroParquetFileSource(path, Avros.specifics(Variant.class));
        return pipeline.read(source);
    } else if (file.getName().endsWith(".vcf")) {
        TableSource<LongWritable, VariantContextWritable> vcfSource = From.formattedFile(path,
                VCFInputFormat.class, LongWritable.class, VariantContextWritable.class);
        return pipeline.read(vcfSource).parallelDo(new VariantContextToVariantFn(),
                Avros.specifics(Variant.class));
    }/*from w w w .  j a v a  2s  . c o  m*/
    throw new IllegalStateException("Unrecognized format for " + file);
}

From source file:com.cloudera.science.quince.SchemaUtils.java

License:Open Source License

public static Format readFormat(Path path) {
    if (path.getName().endsWith(".avro")) {
        return Formats.AVRO;
    } else if (path.getName().endsWith(".parquet")) {
        return Formats.PARQUET;
    }/*from   w  w  w  .  j a  v  a  2s.  c  om*/
    throw new IllegalStateException("Unrecognized format for " + path);
}

From source file:com.cloudera.science.quince.SchemaUtils.java

License:Open Source License

public static Path findFile(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.isDirectory(path)) {
        FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() {
            @Override/*from   w  w w.ja v  a2s.c  o m*/
            public boolean accept(Path p) {
                String name = p.getName();
                return !name.startsWith("_") && !name.startsWith(".");
            }
        });
        return fileStatuses[0].getPath();
    } else {
        return path;
    }
}

From source file:com.cloudera.science.quince.VCFToGA4GHVariantFn.java

License:Open Source License

public static void configureHeaders(Configuration conf, Path[] vcfs, String sampleGroup) throws IOException {
    List<VCFHeader> headers = new ArrayList<>();
    for (Path vcf : vcfs) {
        InputStream inputStream = vcf.getFileSystem(conf).open(vcf);
        VcfBlockIterator iterator = new VcfBlockIterator(inputStream, new FullVcfCodec());
        VCFHeader header = iterator.getHeader();
        header.addMetaDataLine(new VCFHeaderLine(VARIANT_SET_ID, vcf.getName()));
        headers.add(header);//from w  ww  . java 2  s  .c  om
    }
    VCFHeader[] headersArray = headers.toArray(new VCFHeader[headers.size()]);
    conf.set(VARIANT_HEADERS, Base64.encodeBase64String(SerializationUtils.serialize(headersArray)));
    if (sampleGroup != null) {
        conf.set(SAMPLE_GROUP, sampleGroup);
    }
}

From source file:com.cloudera.seismic.segy.SegyUnloader.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption("input", true, "SU sequence files to export from Hadoop");
    options.addOption("output", true, "The local SU file to write");

    // Parse the commandline and check for required arguments.
    CommandLine cmdLine = new PosixParser().parse(options, args, false);
    if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) {
        System.out.println("Mising required input/output arguments");
        new HelpFormatter().printHelp("SegyUnloader", options);
        System.exit(1);//from www .  j av  a 2 s.com
    }

    Configuration conf = getConf();
    FileSystem hdfs = FileSystem.get(conf);
    Path inputPath = new Path(cmdLine.getOptionValue("input"));
    if (!hdfs.exists(inputPath)) {
        System.out.println("Input path does not exist");
        System.exit(1);
    }

    PathFilter pf = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };

    DataOutputStream os = new DataOutputStream(new FileOutputStream(cmdLine.getOptionValue("output")));
    for (FileStatus fs : hdfs.listStatus(inputPath, pf)) {
        write(fs.getPath(), os, conf);
    }
    os.close();

    return 0;
}

From source file:com.cloudera.sqoop.TestAppendUtils.java

License:Apache License

/** @return the number part of a partition */
private int getFilePartition(Path file) {
    String filename = file.getName();
    int pos = filename.lastIndexOf(FILEPART_SEPARATOR);
    if (pos != -1) {
        String part = filename.substring(pos + 1, pos + 1 + PARTITION_DIGITS);
        return Integer.parseInt(part);
    } else {/* w  w  w  .  j a  va 2 s.c om*/
        return 0;
    }
}

From source file:com.cloudera.sqoop.TestExport.java

License:Apache License

/** Export some rows from a SequenceFile, make sure they import correctly. */
public void testSequenceFileExport() throws Exception {

    final int TOTAL_RECORDS = 10;

    // First, generate class and jar files that represent the table
    // we're exporting to.
    LOG.info("Creating initial schema for SeqFile test");
    createTable();//from w w  w  .j ava 2  s  .c o  m
    LOG.info("Generating code...");
    CodeGenTool codeGen = new CodeGenTool();
    String[] codeGenArgs = getCodeGenArgv();
    SqoopOptions options = codeGen.parseArguments(codeGenArgs, null, null, true);
    codeGen.validateOptions(options);
    int ret = codeGen.run(options);
    assertEquals(0, ret);
    List<String> generatedJars = codeGen.getGeneratedJarFiles();

    // Now, wipe the created table so we can export on top of it again.
    LOG.info("Resetting schema and data...");
    createTable();

    // Wipe the directory we use when creating files to export to ensure
    // it's ready for new SequenceFiles.
    removeTablePath();

    assertNotNull(generatedJars);
    assertEquals("Expected 1 generated jar file", 1, generatedJars.size());
    String jarFileName = generatedJars.get(0);
    // Sqoop generates jars named "foo.jar"; by default, this should contain a
    // class named 'foo'. Extract the class name.
    Path jarPath = new Path(jarFileName);
    String jarBaseName = jarPath.getName();
    assertTrue(jarBaseName.endsWith(".jar"));
    assertTrue(jarBaseName.length() > ".jar".length());
    String className = jarBaseName.substring(0, jarBaseName.length() - ".jar".length());

    LOG.info("Using jar filename: " + jarFileName);
    LOG.info("Using class name: " + className);

    ClassLoader prevClassLoader = null;

    try {
        if (null != jarFileName) {
            prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, className);
        }

        // Now use this class and jar name to create a sequence file.
        LOG.info("Writing data to SequenceFiles");
        createSequenceFile(0, TOTAL_RECORDS, className);

        // Now run and verify the export.
        LOG.info("Exporting SequenceFile-based data");
        runExport(getArgv(true, 10, 10, "--class-name", className, "--jar-file", jarFileName));
        verifyExport(TOTAL_RECORDS);
    } finally {
        if (null != prevClassLoader) {
            ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
        }
    }
}

From source file:com.cloudera.sqoop.TestMerge.java

License:Apache License

/**
 * Return true if there's a file in 'dirName' with a line that starts with
 * 'prefix'.//  w  w w.  j  a v  a  2s.co m
 */
protected boolean recordStartsWith(String prefix, String dirName) throws Exception {
    Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR);
    Path targetPath = new Path(warehousePath, dirName);

    FileSystem fs = FileSystem.getLocal(new Configuration());
    FileStatus[] files = fs.listStatus(targetPath);

    if (null == files || files.length == 0) {
        fail("Got no import files!");
    }

    for (FileStatus stat : files) {
        Path p = stat.getPath();
        if (p.getName().startsWith("part-")) {
            if (checkFileForLine(fs, p, prefix)) {
                // We found the line. Nothing further to do.
                return true;
            }
        }
    }

    return false;
}

From source file:com.cloudera.sqoop.tool.ImportTool.java

License:Apache License

/**
 * @return the output path for the imported files;
 * in append mode this will point to a temporary folder.
 * if importing to hbase, this may return null.
 *//*  w  w w  .  j a va 2s. c o  m*/
private Path getOutputPath(SqoopOptions options, String tableName) {
    // Get output directory
    String hdfsWarehouseDir = options.getWarehouseDir();
    String hdfsTargetDir = options.getTargetDir();
    Path outputPath = null;
    if (options.isAppendMode()) {
        // Use temporary path, later removed when appending
        outputPath = AppendUtils.getTempAppendDir(tableName);
        LOG.debug("Using temporary folder: " + outputPath.getName());
    } else {
        // Try in this order: target-dir or warehouse-dir
        if (hdfsTargetDir != null) {
            outputPath = new Path(hdfsTargetDir);
        } else if (hdfsWarehouseDir != null) {
            outputPath = new Path(hdfsWarehouseDir, tableName);
        } else if (null != tableName) {
            outputPath = new Path(tableName);
        }
    }

    return outputPath;
}

From source file:com.cloudera.sqoop.util.AppendUtils.java

License:Apache License

/**
 * Moves the imported files from temporary directory to specified target-dir,
 * renaming partition number if appending file exists.
 *//*  w ww. j a va2  s  . co  m*/
public void append() throws IOException {

    SqoopOptions options = context.getOptions();
    FileSystem fs = FileSystem.get(options.getConf());
    Path tempDir = context.getDestination();

    // Try in this order: target-dir or warehouse-dir
    Path userDestDir = null;
    if (options.getTargetDir() != null) {
        userDestDir = new Path(options.getTargetDir());
    } else if (options.getWarehouseDir() != null) {
        userDestDir = new Path(options.getWarehouseDir(), context.getTableName());
    } else {
        userDestDir = new Path(context.getTableName());
    }

    int nextPartition = 0;

    if (!fs.exists(tempDir)) {
        // This occurs if there was no source (tmp) dir. This might happen
        // if the import was an HBase-target import, but the user specified
        // --append anyway. This is a warning, not an error.
        LOG.warn("Cannot append files to target dir; no such directory: " + tempDir);
        return;
    }

    // Create target directory.
    if (!fs.exists(userDestDir)) {
        LOG.info("Creating missing output directory - " + userDestDir.getName());
        fs.mkdirs(userDestDir);
        nextPartition = 0;
    } else {
        LOG.info("Appending to directory " + userDestDir.getName());
        // Get the right next partition for the imported files
        nextPartition = getNextPartition(fs, userDestDir);
    }

    // move files
    moveFiles(fs, tempDir, userDestDir, nextPartition);

    // delete temporary path
    LOG.debug("Deleting temporary folder " + tempDir.getName());
    fs.delete(tempDir, true);
}