Skip to content

Commit

Permalink
[SYSTEMDS-3541] Exploratory workload-aware compression on intermediates
Browse files Browse the repository at this point in the history
Added a config option for aggressive compression and extended the compression workload analyzer to detect aggregation operations and binary matrix-vector operations when inputs are compressed as a single column group. Updated cost estimation for compression on already compressed inputs and removed scalars from compressible intermediate candidates. Added support for double compressed binary matrix-matrix operations and implemented both single-threaded and multithreaded compressed binary matrix-vector operations with single column group encoding. Removed the relaxed compression threshold and added a logging statement for potential improvements in compressed binary matrix-vector operations. Enabled always sampling for binary matrix-vector operations in CLALibBinaryCellOp, expanded test coverage, and introduced a new compression algorithm test case for k-means with intermediate compression enabled.
 I also extended the CLALibBinaryCellOp binary matrix-vector (sparse & dense) op task to support left and right operations.
  • Loading branch information
e-strauss committed Feb 19, 2025
1 parent 78b23cf commit cb7e6a9
Show file tree
Hide file tree
Showing 7 changed files with 425 additions and 93 deletions.
1 change: 1 addition & 0 deletions src/main/java/org/apache/sysds/conf/DMLConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public class DMLConfig
public static final String PARALLEL_TOKENIZE = "sysds.parallel.tokenize";
public static final String PARALLEL_TOKENIZE_NUM_BLOCKS = "sysds.parallel.tokenize.numBlocks";
public static final String COMPRESSED_LINALG = "sysds.compressed.linalg";
public static final String COMPRESSED_LINALG_INTERMEDIATE = "sysds.compressed.linalg.intermediate";
public static final String COMPRESSED_LOSSY = "sysds.compressed.lossy";
public static final String COMPRESSED_VALID_COMPRESSIONS = "sysds.compressed.valid.compressions";
public static final String COMPRESSED_OVERLAPPING = "sysds.compressed.overlapping";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,12 @@ public static boolean satisfiesAggressiveCompressionCondition(Hop hop) {
satisfies |= HopRewriteUtils.isTernary(hop, OpOp3.CTABLE)
&& hop.getInput(0).getDataType().isMatrix()
&& hop.getInput(1).getDataType().isMatrix();
satisfies |= HopRewriteUtils.isData(hop, OpOpData.PERSISTENTREAD) && !hop.isScalar();
satisfies |= HopRewriteUtils.isData(hop, OpOpData.PERSISTENTREAD);
satisfies |= HopRewriteUtils.isUnary(hop, OpOp1.ROUND, OpOp1.FLOOR, OpOp1.NOT, OpOp1.CEIL);
satisfies |= HopRewriteUtils.isBinary(hop, OpOp2.EQUAL, OpOp2.NOTEQUAL, OpOp2.LESS,
OpOp2.LESSEQUAL, OpOp2.GREATER, OpOp2.GREATEREQUAL, OpOp2.AND, OpOp2.OR, OpOp2.MODULUS);
satisfies |= HopRewriteUtils.isTernary(hop, OpOp3.CTABLE);
satisfies &= !hop.isScalar();
}
if(LOG.isDebugEnabled() && satisfies)
LOG.debug("Operation Satisfies: " + hop);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ private void classifyPhase() {
// final int nRows = mb.getNumRows();
final int nCols = mb.getNumColumns();
// Assume the scaling of cocoding is at maximum square root good relative to number of columns.
final double scale = Math.sqrt(nCols);
final double scale = mb instanceof CompressedMatrixBlock &&
((CompressedMatrixBlock) mb).getColGroups().size() == 1 ? 1 : Math.sqrt(nCols);
final double threshold = _stats.estimatedCostCols / scale;

if(threshold < _stats.originalCost *
Expand Down
Loading

0 comments on commit cb7e6a9

Please sign in to comment.