Example usage for org.apache.hadoop.fs FileSystem getContentSummary

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getContentSummary.

Prototype

public ContentSummary getContentSummary(Path f) throws IOException

Source Link

Document

Return the ContentSummary of a given Path .

Usage

From source file:org.apache.falcon.regression.core.util.HiveAssert.java

License:Apache License

/**
 * Assertion for equality of two tables.
 * @param cluster1 the ColoHelper of first cluster
 * @param table1 the first table (expected values)
 * @param cluster2 the ColoHelper of second cluster
 * @param table2 the second table (actual values)
 * @param softAssert object used for performing assertion
 * @return object used for performing assertion
 * @throws java.io.IOException//from  w  w w .  j av a 2 s .  c o m
 */
public static SoftAssert assertTableEqual(ColoHelper cluster1, HCatTable table1, ColoHelper cluster2,
        HCatTable table2, SoftAssert softAssert, boolean notIgnoreTblTypeAndProps) throws IOException {
    FileSystem cluster1FS = cluster1.getClusterHelper().getHadoopFS();
    FileSystem cluster2FS = cluster2.getClusterHelper().getHadoopFS();
    final String table1FullName = table1.getDbName() + "." + table1.getTableName();
    final String table2FullName = table2.getDbName() + "." + table2.getTableName();
    LOGGER.info("Checking equality of table : " + table1FullName + " & " + table2FullName);
    //table metadata equality
    softAssert.assertEquals(table2.comment(), table1.comment(),
            "Table " + table1FullName + " has different comment from " + table2FullName);
    softAssert.assertEquals(table2.getBucketCols(), table1.getBucketCols(),
            "Table " + table1FullName + " has different bucket columns from " + table2FullName);
    assertColumnListEqual(table1.getCols(), table2.getCols(), softAssert);
    softAssert.assertEquals(table2.getNumBuckets(), table1.getNumBuckets(),
            "Table " + table1FullName + " has different number of buckets from " + table2FullName);
    assertColumnListEqual(table1.getPartCols(), table2.getPartCols(), softAssert);
    softAssert.assertEquals(table2.getSerdeParams(), table1.getSerdeParams(),
            "Table " + table1FullName + " has different serde params from " + table2FullName);
    softAssert.assertEquals(table2.getSortCols(), table1.getSortCols(),
            "Table " + table1FullName + " has different sort columns from " + table2FullName);
    softAssert.assertEquals(table2.getStorageHandler(), table1.getStorageHandler(),
            "Table " + table1FullName + " has different storage handler from " + table2FullName);
    if (notIgnoreTblTypeAndProps) {
        softAssert.assertEquals(table2.getTabletype(), table1.getTabletype(),
                "Table " + table1FullName + " has different Tabletype from " + table2FullName);
    }
    final Map<String, String> tbl1Props = table1.getTblProps();
    final Map<String, String> tbl2Props = table2.getTblProps();
    final String[] ignoreTblProps = { "transient_lastDdlTime", "repl.last.id", "last_modified_by",
            "last_modified_time", "COLUMN_STATS_ACCURATE", };
    for (String ignoreTblProp : ignoreTblProps) {
        tbl1Props.remove(ignoreTblProp);
        tbl2Props.remove(ignoreTblProp);
    }
    final String[] ignoreDefaultProps = { "numRows", "rawDataSize" };
    for (String ignoreProp : ignoreDefaultProps) {
        if ("-1".equals(tbl1Props.get(ignoreProp))) {
            tbl1Props.remove(ignoreProp);
        }
        if ("-1".equals(tbl2Props.get(ignoreProp))) {
            tbl2Props.remove(ignoreProp);
        }
    }

    if (notIgnoreTblTypeAndProps) {
        softAssert.assertEquals(tbl2Props, tbl1Props,
                "Table " + table1FullName + " has different TblProps from " + table2FullName);
    }
    LOGGER.info("Checking equality of table partitions");
    HCatClient hcatClient1 = cluster1.getClusterHelper().getHCatClient();
    HCatClient hcatClient2 = cluster2.getClusterHelper().getHCatClient();
    final List<HCatPartition> table1Partitions = hcatClient1.getPartitions(table1.getDbName(),
            table1.getTableName());
    final List<HCatPartition> table2Partitions = hcatClient2.getPartitions(table2.getDbName(),
            table2.getTableName());
    assertPartitionListEqual(table1Partitions, table2Partitions, softAssert);
    if (notIgnoreTblTypeAndProps) {
        softAssert.assertEquals(cluster2FS.getContentSummary(new Path(table2.getLocation())).getLength(),
                cluster1FS.getContentSummary(new Path(table1.getLocation())).getLength(),
                "Size of content for table1 and table2 are different");
    }

    //table content equality
    LOGGER.info("Checking equality of table contents");
    Statement jdbcStmt1 = null, jdbcStmt2 = null;
    try {
        final boolean execute1;
        final boolean execute2;
        jdbcStmt1 = cluster1.getClusterHelper().getHiveJdbcConnection().createStatement();
        jdbcStmt2 = cluster2.getClusterHelper().getHiveJdbcConnection().createStatement();
        execute1 = jdbcStmt1.execute("select * from " + table1FullName);
        execute2 = jdbcStmt2.execute("select * from " + table2FullName);
        softAssert.assertEquals(execute2, execute1,
                "Table " + table1FullName + " has different result of select * from " + table2FullName);
        if (execute1 && execute2) {
            final ResultSet resultSet1 = jdbcStmt1.getResultSet();
            final ResultSet resultSet2 = jdbcStmt2.getResultSet();
            final List<String> rows1 = HiveUtil.fetchRows(resultSet1);
            final List<String> rows2 = HiveUtil.fetchRows(resultSet2);
            softAssert.assertEquals(rows2, rows1,
                    "Table " + table1FullName + " has different content from " + table2FullName);
        }
    } catch (SQLException e) {
        softAssert.fail("Comparison of content of table " + table1FullName + " with content of table "
                + table2FullName + " failed because of exception\n" + ExceptionUtils.getFullStackTrace(e));
    } finally {
        if (jdbcStmt1 != null) {
            try {
                jdbcStmt1.close();
            } catch (SQLException e) {
                LOGGER.warn("Closing of jdbcStmt1 failed: " + ExceptionUtils.getFullStackTrace(e));
            }
        }
        if (jdbcStmt2 != null) {
            try {
                jdbcStmt2.close();
            } catch (SQLException e) {
                LOGGER.warn("Closing of jdbcStmt2 failed: " + ExceptionUtils.getFullStackTrace(e));
            }
        }
    }
    return softAssert;
}

From source file:org.apache.falcon.regression.ExternalFSTest.java

License:Apache License

@Test(dataProvider = "getData")
public void replicateToExternalFS(final FileSystem externalFS, final String separator, final boolean withData)
        throws Exception {
    final String endpoint = externalFS.getUri().toString();
    Bundle.submitCluster(bundles[0], externalBundle);
    String startTime = TimeUtil.getTimeWrtSystemTime(0);
    String endTime = TimeUtil.addMinsToTime(startTime, 5);
    LOGGER.info("Time range between : " + startTime + " and " + endTime);
    String datePattern = StringUtils
            .join(new String[] { "${YEAR}", "${MONTH}", "${DAY}", "${HOUR}", "${MINUTE}" }, separator);

    //configure feed
    FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
    String targetDataLocation = endpoint + testWasbTargetDir + datePattern;
    feed.setFilePath(sourcePath + '/' + datePattern);
    //erase all clusters from feed definition
    feed.clearFeedClusters();//w ww  . j  a v  a 2s  .  c o m
    //set local cluster as source
    feed.addFeedCluster(new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0)))
            .withRetention("days(1000000)", ActionType.DELETE).withValidity(startTime, endTime)
            .withClusterType(ClusterType.SOURCE).build());
    //set externalFS cluster as target
    feed.addFeedCluster(
            new FeedMerlin.FeedClusterBuilder(Util.readEntityName(externalBundle.getClusters().get(0)))
                    .withRetention("days(1000000)", ActionType.DELETE).withValidity(startTime, endTime)
                    .withClusterType(ClusterType.TARGET).withDataLocation(targetDataLocation).build());

    //submit and schedule feed
    LOGGER.info("Feed : " + Util.prettyPrintXml(feed.toString()));
    AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString()));
    datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH", "mm" }, separator);
    //upload necessary data
    DateTime date = new DateTime(startTime, DateTimeZone.UTC);
    DateTimeFormatter fmt = DateTimeFormat.forPattern(datePattern);
    String timePattern = fmt.print(date);
    HadoopUtil.recreateDir(clusterFS, sourcePath + '/' + timePattern);
    if (withData) {
        HadoopUtil.copyDataToFolder(clusterFS, sourcePath + '/' + timePattern, OSUtil.SINGLE_FILE);
    }

    Path srcPath = new Path(sourcePath + '/' + timePattern);
    Path dstPath = new Path(endpoint + testWasbTargetDir + '/' + timePattern);

    //check if coordinator exists
    TimeUtil.sleepSeconds(10);
    InstanceUtil.waitTillInstancesAreCreated(clusterOC, feed.toString(), 0);
    Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(clusterOC, feed.getName(), "REPLICATION"), 1);

    //replication should start, wait while it ends
    InstanceUtil.waitTillInstanceReachState(clusterOC, Util.readEntityName(feed.toString()), 1,
            CoordinatorAction.Status.SUCCEEDED, EntityType.FEED);

    //check if data has been replicated correctly
    List<Path> cluster1ReplicatedData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, srcPath);
    List<Path> cluster2ReplicatedData = HadoopUtil.getAllFilesRecursivelyHDFS(externalFS, dstPath);
    AssertUtil.checkForListSizes(cluster1ReplicatedData, cluster2ReplicatedData);
    final ContentSummary srcSummary = clusterFS.getContentSummary(srcPath);
    final ContentSummary dstSummary = externalFS.getContentSummary(dstPath);
    Assert.assertEquals(dstSummary.getLength(), srcSummary.getLength());
}

From source file:org.apache.falcon.workflow.LateDataHandler.java

License:Apache License

private long usage(Path inPath, Configuration conf) throws IOException, FalconException {
    FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(inPath.toUri(), conf);
    FileStatus[] fileStatuses = fs.globStatus(inPath);
    if (fileStatuses == null || fileStatuses.length == 0) {
        return 0;
    }//from   ww  w . jav a  2 s . co  m
    long totalSize = 0;
    for (FileStatus fileStatus : fileStatuses) {
        totalSize += fs.getContentSummary(fileStatus.getPath()).getLength();
    }
    return totalSize;
}

From source file:org.apache.ignite.igfs.HadoopIgfs20FileSystemAbstractSelfTest.java

License:Apache License

/**
 * Test expected failures for 'get content summary' operation.
 *
 * @param fs File system to test.// w  w w. ja  va 2  s.  c om
 * @param path Path to evaluate content summary for.
 */
private void assertContentSummaryFails(final FileSystem fs, final Path path) {
    GridTestUtils.assertThrows(log, new Callable<ContentSummary>() {
        @Override
        public ContentSummary call() throws Exception {
            return fs.getContentSummary(path);
        }
    }, FileNotFoundException.class, null);
}

From source file:org.apache.ivory.latedata.LateDataHandler.java

License:Apache License

public long usage(Path inPath, Configuration conf) throws IOException {
    FileSystem fs = inPath.getFileSystem(conf);
    FileStatus status[] = fs.globStatus(inPath);
    if (status == null || status.length == 0) {
        return 0;
    }//from   w w  w  .j  a  va  2s .com
    long totalSize = 0;
    for (FileStatus statu : status) {
        totalSize += fs.getContentSummary(statu.getPath()).getLength();
    }
    return totalSize;
}

From source file:org.apache.kylin.source.hive.CreateFlatHiveTableStep.java

License:Apache License

private long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    long length = contentSummary.getLength();
    return length;
}

From source file:org.apache.lens.server.query.ResultFormatter.java

License:Apache License

/**
 * Format output.//from  w w  w  . ja v  a 2s .c om
 *
 * @param ctx the query context
 */
private void formatOutput(QueryContext ctx) {
    QueryHandle queryHandle = ctx.getQueryHandle();
    this.logSegregationContext.setLogSegragationAndQueryId(ctx.getQueryHandleString());
    try {
        if (!ctx.isPersistent()) {
            log.info("No result formatting required for query " + queryHandle);
            return;
        }
        if (ctx.isResultAvailableInDriver()) {
            log.info("Result formatter for {}", queryHandle);
            LensResultSet resultSet = queryService.getDriverResultset(queryHandle);
            boolean isPersistedInDriver = resultSet instanceof PersistentResultSet;
            if (isPersistedInDriver) { // skip result formatting if persisted size is huge
                Path persistedDirectory = new Path(ctx.getDriverResultPath());
                FileSystem fs = persistedDirectory.getFileSystem(ctx.getConf());
                long size = fs.getContentSummary(persistedDirectory).getLength();
                long threshold = ctx.getConf().getLong(LensConfConstants.RESULT_FORMAT_SIZE_THRESHOLD,
                        LensConfConstants.DEFAULT_RESULT_FORMAT_SIZE_THRESHOLD);
                log.info(" size :{} threshold:{}", size, threshold);
                if (size > threshold) {
                    log.warn(
                            "Persisted result size more than the threshold, size:{} and threshold:{}; Skipping formatter",
                            size, threshold);
                    queryService.setSuccessState(ctx);
                    return;
                }
            }
            // now do the formatting
            createAndSetFormatter(ctx, isPersistedInDriver);
            QueryOutputFormatter formatter = ctx.getQueryOutputFormatter();
            try {
                formatter.init(ctx, resultSet.getMetadata());
                if (ctx.getConf().getBoolean(LensConfConstants.QUERY_OUTPUT_WRITE_HEADER,
                        LensConfConstants.DEFAULT_OUTPUT_WRITE_HEADER)) {
                    formatter.writeHeader();
                }
                if (isPersistedInDriver) {
                    log.info("Result formatter for {} in persistent result", queryHandle);
                    Path persistedDirectory = new Path(ctx.getDriverResultPath());
                    // write all files from persistent directory
                    ((PersistedOutputFormatter) formatter).addRowsFromPersistedPath(persistedDirectory);
                } else {
                    log.info("Result formatter for {} in inmemory result", queryHandle);
                    InMemoryResultSet inmemory = (InMemoryResultSet) resultSet;
                    while (inmemory.hasNext()) {
                        ((InMemoryOutputFormatter) formatter).writeRow(inmemory.next());
                    }
                    inmemory.setFullyAccessed(true);
                }
                if (ctx.getConf().getBoolean(LensConfConstants.QUERY_OUTPUT_WRITE_FOOTER,
                        LensConfConstants.DEFAULT_OUTPUT_WRITE_FOOTER)) {
                    formatter.writeFooter();
                }
                formatter.commit();
            } finally {
                formatter.close();
            }
            queryService.setSuccessState(ctx);
            log.info("Result formatter has completed. Final path:{}", formatter.getFinalOutputPath());
        }
    } catch (Exception e) {
        MetricsService metricsService = LensServices.get().getService(MetricsService.NAME);
        metricsService.incrCounter(ResultFormatter.class, "formatting-errors");
        log.warn("Exception while formatting result for {}", queryHandle, e);
        try {
            // set output formatter to null so that server restart is faster in case this query is not purged.
            ctx.setQueryOutputFormatter(null);
            queryService.setFailedStatus(ctx, ERROR_MESSAGE, e);
        } catch (LensException e1) {
            log.error("Exception while setting failure for {}", queryHandle, e1);
        }
    }
}

From source file:org.apache.metamodel.util.HdfsResource.java

License:Apache License

@Override
public long getSize() {
    final FileSystem fs = getHadoopFileSystem();
    try {//  w w  w.j  a  v  a 2  s .c om
        if (fs.isFile(getHadoopPath())) {
            return fs.getFileStatus(getHadoopPath()).getLen();
        } else {
            return fs.getContentSummary(getHadoopPath()).getLength();
        }
    } catch (Exception e) {
        throw wrapException(e);
    } finally {
        FileHelper.safeClose(fs);
    }
}

From source file:org.apache.orc.tools.JsonFileDump.java

License:Apache License

public static void printJsonMetaData(List<String> files, Configuration conf, List<Integer> rowIndexCols,
        boolean prettyPrint, boolean printTimeZone) throws JSONException, IOException {
    if (files.isEmpty()) {
        return;/* w  w  w.  j  ava2s .c o  m*/
    }
    JSONStringer writer = new JSONStringer();
    boolean multiFile = files.size() > 1;
    if (multiFile) {
        writer.array();
    } else {
        writer.object();
    }
    for (String filename : files) {
        try {
            if (multiFile) {
                writer.object();
            }
            writer.key("fileName").value(filename);
            Path path = new Path(filename);
            Reader reader = FileDump.getReader(path, conf, null);
            if (reader == null) {
                writer.key("status").value("FAILED");
                continue;
            }
            writer.key("fileVersion").value(reader.getFileVersion().getName());
            writer.key("writerVersion").value(reader.getWriterVersion());
            RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
            writer.key("numberOfRows").value(reader.getNumberOfRows());
            writer.key("compression").value(reader.getCompressionKind());
            if (reader.getCompressionKind() != CompressionKind.NONE) {
                writer.key("compressionBufferSize").value(reader.getCompressionSize());
            }
            writer.key("schemaString").value(reader.getSchema().toString());
            writer.key("schema").array();
            writeSchema(writer, reader.getTypes());
            writer.endArray();

            writer.key("stripeStatistics").array();
            List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
            for (int n = 0; n < stripeStatistics.size(); n++) {
                writer.object();
                writer.key("stripeNumber").value(n + 1);
                StripeStatistics ss = stripeStatistics.get(n);
                writer.key("columnStatistics").array();
                for (int i = 0; i < ss.getColumnStatistics().length; i++) {
                    writer.object();
                    writer.key("columnId").value(i);
                    writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
                    writer.endObject();
                }
                writer.endArray();
                writer.endObject();
            }
            writer.endArray();

            ColumnStatistics[] stats = reader.getStatistics();
            int colCount = stats.length;
            if (rowIndexCols == null) {
                rowIndexCols = new ArrayList<>(colCount);
                for (int i = 0; i < colCount; ++i) {
                    rowIndexCols.add(i);
                }
            }
            writer.key("fileStatistics").array();
            for (int i = 0; i < stats.length; ++i) {
                writer.object();
                writer.key("columnId").value(i);
                writeColumnStatistics(writer, stats[i]);
                writer.endObject();
            }
            writer.endArray();

            writer.key("stripes").array();
            int stripeIx = -1;
            for (StripeInformation stripe : reader.getStripes()) {
                ++stripeIx;
                long stripeStart = stripe.getOffset();
                OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
                writer.object(); // start of stripe information
                writer.key("stripeNumber").value(stripeIx + 1);
                writer.key("stripeInformation");
                writeStripeInformation(writer, stripe);
                if (printTimeZone) {
                    writer.key("writerTimezone")
                            .value(footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
                }
                long sectionStart = stripeStart;

                writer.key("streams").array();
                for (OrcProto.Stream section : footer.getStreamsList()) {
                    writer.object();
                    String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
                    writer.key("columnId").value(section.getColumn());
                    writer.key("section").value(kind);
                    writer.key("startOffset").value(sectionStart);
                    writer.key("length").value(section.getLength());
                    sectionStart += section.getLength();
                    writer.endObject();
                }
                writer.endArray();

                writer.key("encodings").array();
                for (int i = 0; i < footer.getColumnsCount(); ++i) {
                    writer.object();
                    OrcProto.ColumnEncoding encoding = footer.getColumns(i);
                    writer.key("columnId").value(i);
                    writer.key("kind").value(encoding.getKind());
                    if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY
                            || encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
                        writer.key("dictionarySize").value(encoding.getDictionarySize());
                    }
                    writer.endObject();
                }
                writer.endArray();
                if (!rowIndexCols.isEmpty()) {
                    // include the columns that are specified, only if the columns are included, bloom filter
                    // will be read
                    boolean[] sargColumns = new boolean[colCount];
                    for (int colIdx : rowIndexCols) {
                        sargColumns[colIdx] = true;
                    }
                    OrcIndex indices = rows.readRowIndex(stripeIx, null, sargColumns);
                    writer.key("indexes").array();
                    for (int col : rowIndexCols) {
                        writer.object();
                        writer.key("columnId").value(col);
                        writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
                        writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
                        writer.endObject();
                    }
                    writer.endArray();
                }
                writer.endObject(); // end of stripe information
            }
            writer.endArray();

            FileSystem fs = path.getFileSystem(conf);
            long fileLen = fs.getContentSummary(path).getLength();
            long paddedBytes = FileDump.getTotalPaddingSize(reader);
            // empty ORC file is ~45 bytes. Assumption here is file length always >0
            double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
            writer.key("fileLength").value(fileLen);
            writer.key("paddingLength").value(paddedBytes);
            writer.key("paddingRatio").value(percentPadding);
            AcidStats acidStats = OrcAcidUtils.parseAcidStats(reader);
            if (acidStats != null) {
                writer.key("numInserts").value(acidStats.inserts);
                writer.key("numDeletes").value(acidStats.deletes);
                writer.key("numUpdates").value(acidStats.updates);
            }
            writer.key("status").value("OK");
            rows.close();

            writer.endObject();
        } catch (Exception e) {
            writer.key("status").value("FAILED");
            throw e;
        }
    }
    if (multiFile) {
        writer.endArray();
    }

    if (prettyPrint) {
        final String prettyJson;
        if (multiFile) {
            JSONArray jsonArray = new JSONArray(writer.toString());
            prettyJson = jsonArray.toString(2);
        } else {
            JSONObject jsonObject = new JSONObject(writer.toString());
            prettyJson = jsonObject.toString(2);
        }
        System.out.println(prettyJson);
    } else {
        System.out.println(writer.toString());
    }
}

From source file:org.apache.sysml.runtime.controlprogram.parfor.opt.OptimizerRuleBased.java

License:Apache License

/**
 * Increasing the partition replication factor is beneficial if partitions are
 * read multiple times (e.g., in nested loops) because partitioning (done once)
 * gets slightly slower but there is a higher probability for local access
 * //  w  ww  . j  a va 2  s  .c  o  m
 * NOTE: this rewrite requires 'set data partitioner' to be executed in order to
 * leverage the partitioning information in the plan tree. 
 * 
 * @param n internal representation of a plan alternative for program blocks and instructions
 * @param partitionedMatrices map of data partition formats
 * @param vars local variable map
 * @throws DMLRuntimeException if DMLRuntimeException occurs
 */
protected void rewriteSetPartitionReplicationFactor(OptNode n,
        HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars)
        throws DMLRuntimeException {
    boolean apply = false;
    double sizeReplicated = 0;
    int replication = ParForProgramBlock.WRITE_REPLICATION_FACTOR;

    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping()
            .getMappedProg(n.getID())[1];

    if (((n.getExecType() == ExecType.MR
            && n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_MR.name()))
            || (n.getExecType() == ExecType.SPARK
                    && n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_SPARK.name())))
            && n.hasNestedParallelism(false) && n.hasNestedPartitionReads(false)) {
        apply = true;

        //account for problem and cluster constraints
        replication = (int) Math.min(_N, _rnk);

        //account for internal max constraint (note hadoop will warn if max > 10)
        replication = (int) Math.min(replication, MAX_REPLICATION_FACTOR_PARTITIONING);

        //account for remaining hdfs capacity
        try {
            FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
            long hdfsCapacityRemain = fs.getStatus().getRemaining();
            long sizeInputs = 0; //sum of all input sizes (w/o replication)
            for (String var : partitionedMatrices.keySet()) {
                MatrixObject mo = (MatrixObject) vars.get(var);
                Path fname = new Path(mo.getFileName());
                if (fs.exists(fname)) //non-existing (e.g., CP) -> small file
                    sizeInputs += fs.getContentSummary(fname).getLength();
            }
            replication = (int) Math.min(replication, Math.floor(0.9 * hdfsCapacityRemain / sizeInputs));

            //ensure at least replication 1
            replication = Math.max(replication, ParForProgramBlock.WRITE_REPLICATION_FACTOR);
            sizeReplicated = replication * sizeInputs;
        } catch (Exception ex) {
            throw new DMLRuntimeException("Failed to analyze remaining hdfs capacity.", ex);
        }
    }

    //modify the runtime plan 
    if (apply)
        pfpb.setPartitionReplicationFactor(replication);

    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set partition replication factor' - result=" + apply
            + ((apply) ? " (" + replication + ", " + toMB(sizeReplicated) + ")" : ""));
}