Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSUnsortedHoplogOrganizer.java

License:Apache License

private FileStatus[] getExpiredHoplogs() throws IOException {
    FileStatus files[] = FSUtils.listStatus(fileSystem, bucketPath, new PathFilter() {
        @Override/*from  ww  w . java 2 s.  c om*/
        public boolean accept(Path file) {
            // All expired hoplog end with expire extension and must match the valid file regex
            String fileName = file.getName();
            if (!fileName.endsWith(EXPIRED_HOPLOG_EXTENSION)) {
                return false;
            }
            return true;
        }
    });
    return files;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSUnsortedHoplogOrganizer.java

License:Apache License

/**
 * locks sorted oplogs collection, removes oplog and renames for deletion later
 * @throws IOException /*from   w w  w.j av  a 2s. com*/
 */
private void markHoplogsForDeletion() throws IOException {

    ArrayList<IOException> errors = new ArrayList<IOException>();
    FileStatus validHoplogs[] = FSUtils.listStatus(fileSystem, bucketPath, new PathFilter() {
        @Override
        public boolean accept(Path file) {
            // All valid hoplog files must match the regex
            Matcher matcher = HOPLOG_PATTERN.matcher(file.getName());
            return matcher.matches();
        }
    });

    FileStatus[] expired = getExpiredHoplogs();
    validHoplogs = filterValidHoplogs(validHoplogs, expired);

    if (validHoplogs == null || validHoplogs.length == 0) {
        return;
    }
    for (FileStatus fileStatus : validHoplogs) {
        try {
            addExpiryMarkerForAFile(getHoplog(fileStatus.getPath()));
        } catch (IOException e) {
            // even if there is an IO error continue removing other hoplogs and
            // notify at the end
            errors.add(e);
        }
    }

    if (!errors.isEmpty()) {
        for (IOException e : errors) {
            logger.warn(LocalizedStrings.HOPLOG_HOPLOG_REMOVE_FAILED, e);
        }
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java

License:Apache License

@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath,
        Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
        final Progressable progressable) throws IOException {

    // Read configuration for the target path, first from jobconf, then from table properties
    String hfilePath = getFamilyPath(jc, tableProperties);
    if (hfilePath == null) {
        throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles");
    }//from   w ww  . j  ava2  s  . c  o m

    // Target path's last component is also the column family name.
    final Path columnFamilyPath = new Path(hfilePath);
    final String columnFamilyName = columnFamilyPath.getName();
    final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName);
    final Job job = new Job(jc);
    setCompressOutput(job, isCompressed);
    setOutputPath(job, finalOutPath);

    // Create the HFile writer
    final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims()
            .newTaskAttemptContext(job.getConfiguration(), progressable);

    final Path outputdir = FileOutputFormat.getOutputPath(tac);
    final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter(
            tac);

    // Individual columns are going to be pivoted to HBase cells,
    // and for each row, they need to be written out in order
    // of column name, so sort the column names now, creating a
    // mapping to their column position.  However, the first
    // column is interpreted as the row key.
    String columnList = tableProperties.getProperty("columns");
    String[] columnArray = columnList.split(",");
    final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    int i = 0;
    for (String columnName : columnArray) {
        if (i != 0) {
            columnMap.put(Bytes.toBytes(columnName), i);
        }
        ++i;
    }

    return new RecordWriter() {

        @Override
        public void close(boolean abort) throws IOException {
            try {
                fileWriter.close(null);
                if (abort) {
                    return;
                }
                // Move the hfiles file(s) from the task output directory to the
                // location specified by the user.
                FileSystem fs = outputdir.getFileSystem(jc);
                fs.mkdirs(columnFamilyPath);
                Path srcDir = outputdir;
                for (;;) {
                    FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER);
                    if ((files == null) || (files.length == 0)) {
                        throw new IOException("No family directories found in " + srcDir);
                    }
                    if (files.length != 1) {
                        throw new IOException("Multiple family directories found in " + srcDir);
                    }
                    srcDir = files[0].getPath();
                    if (srcDir.getName().equals(columnFamilyName)) {
                        break;
                    }
                }
                for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) {
                    fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName()));
                }
                // Hive actually wants a file as task output (not a directory), so
                // replace the empty directory with an empty file to keep it happy.
                fs.delete(outputdir, true);
                fs.createNewFile(outputdir);
            } catch (InterruptedException ex) {
                throw new IOException(ex);
            }
        }

        private void writeText(Text text) throws IOException {
            // Decompose the incoming text row into fields.
            String s = text.toString();
            String[] fields = s.split("\u0001");
            assert (fields.length <= (columnMap.size() + 1));
            // First field is the row key.
            byte[] rowKeyBytes = Bytes.toBytes(fields[0]);
            // Remaining fields are cells addressed by column name within row.
            for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) {
                byte[] columnNameBytes = entry.getKey();
                int iColumn = entry.getValue();
                String val;
                if (iColumn >= fields.length) {
                    // trailing blank field
                    val = "";
                } else {
                    val = fields[iColumn];
                    if ("\\N".equals(val)) {
                        // omit nulls
                        continue;
                    }
                }
                byte[] valBytes = Bytes.toBytes(val);
                KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes);
                try {
                    fileWriter.write(null, kv);
                } catch (IOException e) {
                    LOG.error("Failed while writing row: " + s);
                    throw e;
                } catch (InterruptedException ex) {
                    throw new IOException(ex);
                }
            }
        }

        private void writePut(PutWritable put) throws IOException {
            ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow());
            SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap();
            for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) {
                Collections.sort(entry.getValue(), new CellComparator());
                for (Cell c : entry.getValue()) {
                    try {
                        fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c));
                    } catch (InterruptedException e) {
                        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
                    }
                }
            }
        }

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                writeText((Text) w);
            } else if (w instanceof PutWritable) {
                writePut((PutWritable) w);
            } else {
                throw new IOException("Unexpected writable " + w);
            }
        }
    };
}

From source file:com.github.sadikovi.riff.Riff.java

License:Open Source License

/**
 * Infer compression codec from file name.
 * @param path path to the file/* ww w  .j  ava  2s .c o m*/
 * @return compression codec or null for uncompressed
 */
protected static CompressionCodec inferCompressionCodec(Path path) {
    String name = path.getName();
    int start = name.lastIndexOf('.');
    String ext = (start <= 0) ? "" : name.substring(start);
    return CompressionCodecFactory.forFileExt(ext);
}

From source file:com.github.sakserv.minicluster.yarn.InJvmContainerExecutor.java

License:Apache License

/**
 * YARN provides ability to pass resources (e.g., classpath) through
 * {@link LocalResource}s which allows user to provision all the resources
 * required to run the app. This method will extract those resources as a
 * {@link Set} of {@link URL}s so they are used when {@link ClassLoader} for a
 * container is created./*  ww w. j av  a 2s  . c  o m*/
 *
 * This is done primarily as a convenience for applications that rely on
 * automatic classpath propagation (e.g., pull everything from my dev
 * classpath) instead of manual.
 *
 * @param container
 * @return
 */
private Set<URL> filterAndBuildUserClasspath(Container container) {
    if (logger.isDebugEnabled()) {
        logger.debug("Building additional classpath for the container: " + container);
    }
    Set<URL> additionalClassPathUrls = new HashSet<URL>();
    Set<Path> userClassPath = this.extractUserProvidedClassPathEntries(container);

    for (Path resourcePath : userClassPath) {
        String resourceName = "file:///" + new File(resourcePath.getName()).getAbsolutePath();
        if (logger.isDebugEnabled()) {
            logger.debug("\t adding " + resourceName + " to the classpath");
        }
        try {
            additionalClassPathUrls.add(new URI(resourceName).toURL());
        } catch (Exception e) {
            throw new IllegalArgumentException(e);
        }
    }
    return additionalClassPathUrls;
}

From source file:com.github.xiaofu.demo.parquet.MyConvertUtils.java

License:Apache License

public static void convertCsvToParquet(File csvFile, Path outputParquetFile, boolean enableDictionary)
        throws IOException {
    LOG.info("Converting " + csvFile.getName() + " to " + outputParquetFile.getName());
    String rawSchema = getSchema(csvFile);

    MessageType schema = MessageTypeParser.parseMessageType(rawSchema);
    MyCsvParquetWriter writer = new MyCsvParquetWriter(outputParquetFile, schema, CompressionCodecName.GZIP,
            BLOCK_SIZE, PAGE_SIZE, enableDictionary);

    BufferedReader br = new BufferedReader(new FileReader(csvFile));
    String line;/*w  w  w  .  j ava2  s.  com*/
    int lineNumber = 0;
    try {
        while ((line = br.readLine()) != null) {
            String[] fields = line.split(Pattern.quote(CSV_DELIMITER));
            writer.write(Arrays.asList(fields));
            ++lineNumber;
        }

        writer.close();
    } finally {
        LOG.info("Number of lines: " + lineNumber);
        Utils.closeQuietly(br);
    }
}

From source file:com.github.xiaofu.demo.parquet.MyConvertUtils.java

License:Apache License

public static void convertParquetToCSV(Path parquetFilePath, File csvOutputFile) throws IOException {
    // Preconditions.checkArgument(parquetFilePath.getName().endsWith(".parquet"),
    // "parquet file should have .parquet extension");
    Preconditions.checkArgument(csvOutputFile.getName().endsWith(".csv"),
            "csv file should have .csv extension");
    // Preconditions.checkArgument(!csvOutputFile.exists(),
    // "Output file " + csvOutputFile.getAbsolutePath() +
    // " already exists");

    LOG.info("Converting " + parquetFilePath.getName() + " to " + csvOutputFile.getName());

    Configuration configuration = new Configuration(true);

    GroupReadSupport readSupport = new GroupReadSupport();
    ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, parquetFilePath);
    MessageType schema = readFooter.getFileMetaData().getSchema();

    readSupport.init(configuration, null, schema);
    BufferedWriter w = new BufferedWriter(new FileWriter(csvOutputFile));
    ParquetReader<Group> reader = new ParquetReader<Group>(parquetFilePath, readSupport);
    try {// w  w  w .ja  v  a  2s .c  om
        Group g = null;
        while ((g = reader.read()) != null) {
            writeGroup(w, g, schema);
        }
        reader.close();
    } finally {
        Utils.closeQuietly(w);
    }
}

From source file:com.github.xiaofu.demo.parquet.MyConvertUtils.java

License:Apache License

public static void convertParquetToCSV_With_Specified_Columns_And_Column_Filter(Path parquetFilePath,
        File csvOutputFile) throws IOException {
    // Preconditions.checkArgument(parquetFilePath.getName().endsWith(".parquet"),
    // "parquet file should have .parquet extension");
    Preconditions.checkArgument(csvOutputFile.getName().endsWith(".csv"),
            "csv file should have .csv extension");
    // Preconditions.checkArgument(!csvOutputFile.exists(),
    // "Output file " + csvOutputFile.getAbsolutePath() +
    // " already exists");

    LOG.info("Converting " + parquetFilePath.getName() + " to " + csvOutputFile.getName());
    //??readSupport??
    Configuration configuration = new Configuration(true);
    configuration.set(ReadSupport.PARQUET_READ_SCHEMA, "message m { optional binary user_id;}");
    GroupReadSupport readSupport = new GroupReadSupport();
    /*ParquetMetadata readFooter = ParquetFileReader.readFooter(
    configuration, parquetFilePath);*/
    //MessageType fileSchema = readFooter.getFileMetaData().getSchema();
    MessageType requestSchema = MessageTypeParser.parseMessageType("message m { optional binary user_id;}");
    //readSupport.init(configuration, null, fileSchema);
    BufferedWriter w = new BufferedWriter(new FileWriter(csvOutputFile));
    ParquetReader<Group> reader = new ParquetReader<Group>(configuration, parquetFilePath, readSupport,
            ColumnRecordFilter.column("user_id", new Predicate() {

                @Override//  w w w . j  a  v a 2  s. c  om
                public boolean apply(ColumnReader input) {
                    String temp = input.getBinary().toStringUsingUTF8();
                    return !temp.equals("0") && temp != "";
                }
            }));
    try {
        Group g = null;
        while ((g = reader.read()) != null) {
            writeGroup(w, g, requestSchema);
        }
        reader.close();
    } finally {
        Utils.closeQuietly(w);
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemTestBase.java

License:Open Source License

/**
 * We override certain methods in FileSystem simply to provide debug tracing. (Search for
 * "Overridden functions for debug tracing" in GoogleHadoopFileSystemBase.java).
 * We do not add or update any functionality for such methods. The following
 * tests simply exercise that path to ensure coverage. Consequently, they do not
 * really test any functionality.//from   w  w  w  .  j ava2s.  c  o  m
 *
 * Having coverage for these methods lets us easily determine the amount of
 * coverage that is missing in the rest of the code.
 */
@Test
public void provideCoverageForUnmodifiedMethods() throws IOException {
    // -------------------------------------------------------
    // Create test data.

    // Temporary file in GHFS.
    URI tempFileUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
    Path tempFilePath = ghfsHelper.castAsHadoopPath(tempFileUri);
    Path tempDirPath = tempFilePath.getParent();
    String text = "Hello World!";
    ghfsHelper.writeFile(tempFilePath, text, 1, false);

    // Another temporary file in GHFS.
    URI tempFileUri2 = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
    Path tempFilePath2 = ghfsHelper.castAsHadoopPath(tempFileUri2);

    // Temporary file in local FS.
    File localTempFile = File.createTempFile("ghfs-test-", null);
    Path localTempFilePath = new Path(localTempFile.getPath());
    Path localTempDirPath = localTempFilePath.getParent();

    // -------------------------------------------------------
    // Call methods to provide coverage for. Note that we do not attempt to
    // test their functionality as we are not testing Hadoop engine here.
    try {
        ghfs.deleteOnExit(tempFilePath);
        ghfs.getContentSummary(tempFilePath);
        ghfs.getDelegationToken("foo");
        ghfs.copyFromLocalFile(false, true, localTempFilePath, tempDirPath);
        ghfs.copyFromLocalFile(false, true, new Path[] { localTempFilePath }, tempDirPath);
        localTempFile.delete();
        ghfs.copyToLocalFile(true, tempFilePath, localTempDirPath);
        File localCopiedFile = new File(localTempDirPath.toString(), tempFilePath.getName());
        localCopiedFile.delete();
        Path localOutputPath = ghfs.startLocalOutput(tempFilePath2, localTempFilePath);
        FileWriter writer = new FileWriter(localOutputPath.toString());
        writer.write(text);
        writer.close();
        ghfs.completeLocalOutput(tempFilePath2, localOutputPath);
        ghfs.getUsed();
        ghfs.setVerifyChecksum(false);
        ghfs.getFileChecksum(tempFilePath2);
        ghfs.setPermission(tempFilePath2, FsPermission.getDefault());
        try {
            ghfs.setOwner(tempFilePath2, "foo-user", "foo-group");
        } catch (IOException ioe) {
            // Some filesystems (like the LocalFileSystem) are strict about existence of owners.
            // TODO(user): Abstract out the behaviors around owners/permissions and properly test
            // the different behaviors between different filesystems.
        }
        ghfs.setTimes(tempFilePath2, 0, 0);
    } finally {
        // We do not need to separately delete the temp files created in GHFS because
        // we delete all test buckets recursively at the end of the tests.
        if (localTempFile.exists()) {
            localTempFile.delete();
        }
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopSyncableOutputStream.java

License:Open Source License

/**
 * Returns URI to be used for the next "tail" file in the series.
 *///from w w  w.  j a v a  2s.  com
private URI getNextTemporaryPath() {
    Path basePath = ghfs.getHadoopPath(finalGcsPath);
    Path baseDir = basePath.getParent();
    Path tempPath = new Path(baseDir, String.format("%s%s.%d.%s", TEMPFILE_PREFIX, basePath.getName(),
            curComponentIndex, UUID.randomUUID().toString()));
    return ghfs.getGcsPath(tempPath);
}