Example usage for org.apache.hadoop.fs FileSystem getContentSummary

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getContentSummary.

Prototype

public ContentSummary getContentSummary(Path f) throws IOException

Source Link

Document

Return the ContentSummary of a given Path .

Usage

From source file:com.github.seqware.queryengine.tutorial.Poster.java

License:Open Source License

private void recordSpace(String key) throws IOException {
    try {//from w w  w .  jav  a 2s.c  o m
        Configuration conf = new Configuration();
        HBaseStorage.configureHBaseConfig(conf);
        HBaseConfiguration.addHbaseResources(conf);
        FileSystem fs = FileSystem.get(conf);
        Path homeDirectory = fs.getHomeDirectory();
        Path root = homeDirectory.getParent().getParent();
        Path hbase = new Path(root, "hbase");
        ContentSummary contentSummary = fs.getContentSummary(hbase);
        long spaceConsumedinGB = convertToGB(contentSummary);
        keyValues.put(key + "-total-space-in-GB", Long.toString(spaceConsumedinGB));

        /**
         *
         * if (spaceConsumedinGB > CUT_OFF){ return; }
         *
         */
        Path featureTable = new Path(hbase,
                Constants.Term.NAMESPACE.getTermValue(String.class) + ".hbaseTestTable_v2.Feature." + HG_19);
        contentSummary = fs.getContentSummary(featureTable);
        spaceConsumedinGB = convertToGB(contentSummary);
        keyValues.put(key + "-feature-space-in-GB", Long.toString(spaceConsumedinGB));
    } catch (FileNotFoundException e) {
        /**
         * throw away, this is ok the first time *
         */
    }

}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.opt.OptimizerRuleBased.java

License:Open Source License

/**
 * Increasing the partition replication factor is beneficial if partitions are
 * read multiple times (e.g., in nested loops) because partitioning (done once)
 * gets slightly slower but there is a higher probability for local access
 * /* ww  w .j  a v a  2 s  .co  m*/
 * NOTE: this rewrite requires 'set data partitioner' to be executed in order to
 * leverage the partitioning information in the plan tree. 
 *  
 * @param n
 * @throws DMLRuntimeException 
 */
protected void rewriteSetPartitionReplicationFactor(OptNode n,
        HashMap<String, PDataPartitionFormat> partitionedMatrices, LocalVariableMap vars)
        throws DMLRuntimeException {
    boolean apply = false;
    double sizeReplicated = 0;
    int replication = ParForProgramBlock.WRITE_REPLICATION_FACTOR;

    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping()
            .getMappedProg(n.getID())[1];

    if (n.getExecType() == ExecType.MR
            && n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_MR.toString())
            && n.hasNestedParallelism(false) && n.hasNestedPartitionReads(false)) {
        apply = true;

        //account for problem and cluster constraints
        replication = (int) Math.min(_N, _rnk);

        //account for internal max constraint (note hadoop will warn if max > 10)
        replication = (int) Math.min(replication, MAX_REPLICATION_FACTOR_EXPORT);

        //account for remaining hdfs capacity
        try {
            FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
            long hdfsCapacityRemain = fs.getStatus().getRemaining();
            long sizeInputs = 0; //sum of all input sizes (w/o replication)
            for (String var : partitionedMatrices.keySet()) {
                MatrixObject mo = (MatrixObject) vars.get(var);
                Path fname = new Path(mo.getFileName());
                if (fs.exists(fname)) //non-existing (e.g., CP) -> small file
                    sizeInputs += fs.getContentSummary(fname).getLength();
            }
            replication = (int) Math.min(replication, Math.floor(0.9 * hdfsCapacityRemain / sizeInputs));

            //ensure at least replication 1
            replication = Math.max(replication, ParForProgramBlock.WRITE_REPLICATION_FACTOR);
            sizeReplicated = replication * sizeInputs;
        } catch (Exception ex) {
            throw new DMLRuntimeException("Failed to analyze remaining hdfs capacity.", ex);
        }
    }

    //modify the runtime plan 
    if (apply)
        pfpb.setPartitionReplicationFactor(replication);

    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set partition replication factor' - result=" + apply
            + ((apply) ? " (" + replication + ", " + toMB(sizeReplicated) + ")" : ""));
}

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

/**
 * Returns the size of a file or directory on hdfs in bytes.
 * //w  ww  .j a  v a 2s . c om
 * @param path
 * @return
 * @throws IOException
 */
public static long getFilesizeOnHDFS(Path path) throws IOException {
    FileSystem fs = FileSystem.get(_rJob);
    long ret = 0; //in bytes
    if (fs.isDirectory(path))
        ret = fs.getContentSummary(path).getLength();
    else
        ret = fs.getFileStatus(path).getLen();
    //note: filestatus would return 0 on directories

    return ret;
}

From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java

License:Apache License

private static int[] getFilesTotals(FileSystem fs, Path[] activityFiles) {
    int tbc = 0;//from  www.j av  a 2s  .c  o  m
    int tlc = 0;
    if (ArrayUtils.isNotEmpty(activityFiles)) {
        for (Path f : activityFiles) {
            try {
                ContentSummary cSummary = fs.getContentSummary(f);
                tbc += cSummary.getLength();
                tlc += Utils.countLines(fs.open(f));
            } catch (IOException exc) {
            }
        }
    }

    return new int[] { tbc, tlc };
}

From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStreamTest.java

License:Apache License

@Test()
public void test() throws Exception {
    FileSystem fs = mock(FileSystem.class);
    HdfsFileLineStream stream = new HdfsFileLineStream();

    TestFileList files = new TestFileList(false);

    final String fileName = ("file:////" + files.get(0).getParentFile() + File.separator + files.getPrefix() // NON-NLS
            + "*.TST").replace("\\", "/"); // NON-NLS

    Map<String, String> props = new HashMap<>(2);
    props.put(StreamProperties.PROP_FILENAME, fileName);
    props.put(StreamProperties.PROP_RESTORE_STATE, "false"); // NON-NLS

    when(fs.open(any(Path.class))).thenReturn(new FSDataInputStream(new TestInputStreamStub()));
    final FileStatus fileStatusMock = mock(FileStatus.class);
    final FileStatus[] array = new FileStatus[10];
    Arrays.fill(array, fileStatusMock);
    when(fs.listStatus(any(Path.class), any(PathFilter.class))).thenReturn(array);
    when(fileStatusMock.getModificationTime()).thenReturn(1L, 2L, 3L);
    when(fileStatusMock.getPath()).thenReturn(mock(Path.class));
    when(fs.getContentSummary(any(Path.class))).thenReturn(mock(ContentSummary.class));

    Method m = FileSystem.class.getDeclaredMethod("addFileSystemForTesting", URI.class, Configuration.class, // NON-NLS
            FileSystem.class);
    m.setAccessible(true);/*  w ww.  ja  va 2 s. c o  m*/
    m.invoke(FileSystem.class, URI.create(fileName), new Configuration(), fs);

    StreamThread st = mock(StreamThread.class);
    st.setName("HdfsFileLineStreamTestThreadName"); // NON-NLS
    stream.setOwnerThread(st);

    stream.setProperties(props.entrySet());
    stream.startStream();

    verify(fileStatusMock, atLeastOnce()).getModificationTime();
    verify(fileStatusMock, atLeastOnce()).getPath();
    verify(fs, atLeastOnce()).listStatus(any(Path.class), any(PathFilter.class));

    stream.cleanup();
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

@Override
public ContentSummary getContentSummary(Path f) throws IOException {
    statistics.incrementReadOps(1);//  w  w w.j av a2 s. com
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<ContentSummary>() {
        @Override
        public ContentSummary doCall(final Path p) throws IOException, UnresolvedLinkException {
            return dfs.getContentSummary(getPathName(p));
        }

        @Override
        public ContentSummary next(final FileSystem fs, final Path p) throws IOException {
            return fs.getContentSummary(p);
        }
    }.resolve(this, absF);
}

From source file:com.rim.logdriver.util.IndexLogs.java

License:Apache License

private static void updateComponent(Map<String, Map<String, Map<String, Map<String, Component>>>> data,
        FileSystem fs, FileStatus matchedFolder, Path path) throws IOException, ParseException {
    // Parse path by splitting it across slashes. To determine service (which might contain slashes) grab
    // everything after the DC name, but before the matched date string.
    String[] pathPieces = matchedFolder.getPath().toString().split("/");
    String[] servicePieces = path.toString().split(pathPieces[4] + "/");
    servicePieces = servicePieces[1].split("/" + pathPieces[pathPieces.length - 5]);
    String DC = pathPieces[4];//from w w  w. j  a v  a  2 s.  com
    String service = servicePieces[0];
    String component = pathPieces[pathPieces.length - 2];
    String type = pathPieces[pathPieces.length - 5];
    String status = pathPieces[pathPieces.length - 1];
    Date date = inputFormat.parse(pathPieces[pathPieces.length - 4]);

    // Check if there is a matching component, create one if not. 
    if (!componentExists(data, DC, service, type, component)) {
        data.get(DC).get(service).get(type).put(component, new Component(DC, service, type, component, date));
    }

    Component thisComponent = data.get(DC).get(service).get(type).get(component);

    // Update the start or end date if the current date is before or after, respectively. 
    if (date.before(thisComponent.startDate)) {
        thisComponent.startDate = date;
    } else if (date.after(thisComponent.endDate)) {
        thisComponent.endDate = date;
    }

    // Is the current folder an archive? If so and date is later than the current archiveDate, update it. 
    if (status.matches("archive") && date.after(thisComponent.archiveDate)) {
        thisComponent.archiveDate = date;
    }

    // Add size data
    if (status.matches("data")) {
        thisComponent.addDataSize(fs.getContentSummary(matchedFolder.getPath()).getLength());
    } else if (status.matches("incoming")) {
        thisComponent.addIncomingSize(fs.getContentSummary(matchedFolder.getPath()).getLength());
    } else if (status.matches("archive")) {
        thisComponent.addArchiveSize(fs.getContentSummary(matchedFolder.getPath()).getLength());
    }
}

From source file:com.rim.logdriver.util.LogStats.java

License:Apache License

public static double[] getDataOverTime(FileSystem fs, Component component, Date startDate, Date endDate) {

    if (startDate.after(component.endDate) || endDate.before(component.startDate)) {
        return new double[0];
    }/*from   w  ww . j av a 2 s.c  o m*/

    // If the date range specified overlaps archived data, notify the user 
    if (startDate.before(component.archiveDate)) {
        System.out.println("Warning: Time range specified includes archived data");
    }

    // Set up variable and array. Fill with -1 to indicate if hours are unused at the end. 
    long totalHours = (endDate.getTime() - startDate.getTime()) / oneHour;
    int logVolumesIndex = 0;
    double[] logVolumes = new double[(int) totalHours];
    String basePath = "/service/" + component.DC + "/" + component.service + "/" + component.type + "/";

    for (Long currentDate = startDate.getTime(); currentDate < endDate.getTime(); currentDate += oneHour) {
        String dateAndHour = inputFormat.format(new Date(currentDate)) + "/"
                + String.format("%02d", new Date(currentDate).getHours()) + "/";
        Path path = new Path(basePath + dateAndHour + component.component);
        if (component.startDate.getTime() - oneDay < currentDate
                && component.endDate.getTime() + oneDay > currentDate) {
            try {
                logVolumes[logVolumesIndex] = fs.getContentSummary(path).getLength();
            } catch (IOException e) {
                logVolumes[logVolumesIndex] = 0;
            }
        } else {
            logVolumes[logVolumesIndex] = 0;
        }
        logVolumesIndex++;
    }
    return logVolumes;
}

From source file:com.splicemachine.derby.impl.io.HdfsDirFile.java

License:Apache License

@Override
public long length() {
    if (!exists() || isDirectory()) {
        return 0;
    } // As specified in the Javadoc.
    try {/*from  ww w . j av  a2 s.co  m*/
        FileSystem fs = getFileSystem();
        return fs.getContentSummary(new Path(path)).getLength();
    } catch (IOException e) {
        LOG.error(String.format("An exception occurred while getting the size of the file '%s'.", path), e);
        return 0;
    }
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to move files from HDFS to local filesystem
 *
 * localPath: Path on the machines filesystem
 * fs:FileSystem object from HDFS/* w w w  .  j  a  v  a  2s . c o  m*/
 * pathList:List of paths for files that might need to be backed
 * up
 * size:max size in bytes to be backed up
 *
 * ReturnsDate of the last files backed up if reached size limit,
 * else, zero
 **/
public long backupFiles(String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList,
        long size) {
    Path fsPath;
    long tmpSize = 0;
    long tmpDate = 0;

    // Start iterating over all paths
    for (Path hdfsPath : pathList) {
        try {
            long nFileSize = fs.getContentSummary(hdfsPath).getLength();
            tmpSize = tmpSize + nFileSize;

            if ((tmpSize <= size) || (size == 0)) {
                FileStatus stat = fs.getFileStatus(hdfsPath);

                System.err.println("File " + hdfsPath.toUri().getPath() + " " + nFileSize + " bytes, "
                        + "perms: " + stat.getOwner() + "/" + stat.getGroup() + ", "
                        + stat.getPermission().toString());

                tmpDate = stat.getModificationTime() / 1000;

                String sFsPath = localPath + hdfsPath.toUri().getPath();
                fsPath = new Path(sFsPath);

                File f = new File(sFsPath);

                // COMMENTED OUT: until a few backup cycles run
                // and the mtime gets in fact set on all copied
                // files.
                //
                // ignore it if the file exists and has the same mtime
                // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime())
                // {
                // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs");
                // continue;
                // }

                if (false == m_bDryRun) {
                    // check if we need to back up the local file
                    // (not directory), if it already exists.
                    if (f.exists() && f.isFile()) {
                        // ignore files with substrings in the
                        // no-preserve file
                        if (true == doPreserveFile(sFsPath)) {
                            // move it to the backup path
                            String sNewPath = preservePath + hdfsPath.toUri().getPath();
                            File newFile = new File(sNewPath);

                            // create directory structure for new file?
                            if (false == newFile.getParentFile().exists()) {
                                if (false == newFile.getParentFile().mkdirs()) {
                                    System.err
                                            .println("Failed to mkdirs " + newFile.getParentFile().toString());
                                    System.exit(1);
                                }
                            }

                            // rename existing file to new location
                            if (false == f.renameTo(newFile)) {
                                System.err.println(
                                        "Failed to renameTo " + f.toString() + " to " + newFile.toString());
                                System.exit(1);
                            }

                            System.out.println("preserved " + f.toString() + " into " + newFile.toString());
                        } else {
                            System.out.println("skipped preservation of " + f.toString());
                        }
                    }

                    // copy from hdfs to local filesystem
                    fs.copyToLocalFile(hdfsPath, fsPath);

                    // set the mtime to match hdfs file
                    f.setLastModified(stat.getModificationTime());

                    // compare checksums on both files
                    compareChecksums(fs, hdfsPath, sFsPath);
                }

                // don't print the progress after every file -- go
                // by at least 1% increments
                long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes);
                if (nPercentDone > m_nLastPercentBytesDone) {
                    System.out.println("progress: copied " + prettyPrintBytes(tmpSize) + ", " + nPercentDone
                            + "% done" + ", tstamp=" + tmpDate);

                    m_nLastPercentBytesDone = nPercentDone;
                }

                if (m_nSleepSeconds > 0) {
                    try {
                        Thread.sleep(1000 * m_nSleepSeconds);
                    } catch (Exception e2) {
                        // ignore
                    }
                }
            } else {
                return tmpDate;
            }
        } catch (IOException e) {
            System.err.println("FATAL ERROR: Something wrong with the file");
            System.err.println(e);
            System.out.println(tmpDate);
            System.exit(1);

            return 0;
        }
    }

    return 0;
}