Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made framework changes to initialize specific cache block sizes for TRSM #570

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions build/bli_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@
#define BLIS_DISABLE_SYSTEM
#endif

//This macro is enabled only for ZEN family configurations.
//This enables us to use different cache-blocking sizes for TRSM instead of common level-3 cache-block sizes.
#if @enable_aocl_zen@
#define AOCL_BLIS_ZEN
#endif

#if @enable_openmp@
#define BLIS_ENABLE_OPENMP
#endif
Expand Down
14 changes: 14 additions & 0 deletions config/zen/bli_cntx_init_zen.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,20 @@ void bli_cntx_init_zen( cntx_t* cntx )
cntx
);

// Update the context with the current architecture's register and cache
// blocksizes for level-3 TRSM execution.
bli_cntx_set_trsm_blkszs
(
5,
// level-3
BLIS_NC, &blkszs[ BLIS_NC ],
BLIS_KC, &blkszs[ BLIS_KC ],
BLIS_MC, &blkszs[ BLIS_MC ],
BLIS_NR, &blkszs[ BLIS_NR ],
BLIS_MR, &blkszs[ BLIS_MR ],
cntx
);

// -------------------------------------------------------------------------

// Initialize sup thresholds with architecture-appropriate values.
Expand Down
36 changes: 25 additions & 11 deletions config/zen2/bli_cntx_init_zen2.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,17 +174,31 @@ void bli_cntx_init_zen2( cntx_t* cntx )

// -------------------------------------------------------------------------

// Initialize sup thresholds with architecture-appropriate values.
// s d c z
#if 1
bli_blksz_init_easy( &thresh[ BLIS_MT ], 500, 249, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_NT ], 500, 249, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_KT ], 500, 249, -1, -1 );
#else
bli_blksz_init_easy( &thresh[ BLIS_MT ], 100000, 100000, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_NT ], 100000, 100000, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_KT ], 100000, 100000, -1, -1 );
#endif
//Initialize TRSM blocksize objects with architecture-specific values.
//Using different cache block sizes for TRSM instead of common level-3 block sizes.
//Tuning is done for double-precision only.
// s d c z
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 492, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 1600, 4080, 4080 );

// Update the context with the current architecture's register and cache
// blocksizes for level-3 TRSM problems.
bli_cntx_set_trsm_blkszs
(
5,
BLIS_NC, &blkszs[ BLIS_NC ],
BLIS_KC, &blkszs[ BLIS_KC ],
BLIS_MC, &blkszs[ BLIS_MC ],
BLIS_NR, &blkszs[ BLIS_NR ],
BLIS_MR, &blkszs[ BLIS_MR ],
cntx
);

// Initialize sup thresholds with architecture-appropriate values. s d c z
bli_blksz_init_easy( &thresh[ BLIS_MT ], 512, 256, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_NT ], 200, 100, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_KT ], 240, 120, -1, -1 );

// Initialize the context with the sup thresholds.
bli_cntx_set_l3_sup_thresh
Expand Down
13 changes: 13 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -3282,6 +3282,18 @@ main()
uconf=$(echo ${config_name} | tr '[:lower:]' '[:upper:]')
config_name_define="#define BLIS_FAMILY_${uconf}\n"

#create a AOCL specific #define
#This macro is enabled only for zen family configurations.
#This enables us to use different cache block sizes for TRSM instead of common level-3 block sizes.
uconf=$(echo ${config_name} | grep -c 'zen' | cut -d. -f1)
if [[ $uconf == 1 ]]; then
enable_aocl_zen='yes'
enable_aocl_zen_01=1
else
enable_aocl_zen = 'no';
enable_aocl_zen_01=0;
fi

# Create a list of #defines, one for each configuration in config_list.
config_list_defines=""
for conf in ${config_list}; do
Expand Down Expand Up @@ -3395,6 +3407,7 @@ main()
| perl -pe "s/\@config_list_defines\@/${config_list_defines}/g" \
| perl -pe "s/\@kernel_list_defines\@/${kernel_list_defines}/g" \
| sed -e "s/@enable_system@/${enable_system_01}/g" \
| sed -e "s/\@enable_aocl_zen\@/${enable_aocl_zen_01}/g" \
| sed -e "s/@enable_openmp@/${enable_openmp_01}/g" \
| sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \
| sed -e "s/@enable_jrir_slab@/${enable_jrir_slab_01}/g" \
Expand Down
6 changes: 3 additions & 3 deletions frame/3/bli_l3_blocksize.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
/*
/*

BLIS
An object-based framework for developing high-performance BLAS-like
libraries.

Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
Expand Down Expand Up @@ -34,7 +35,6 @@

#include "blis.h"


dim_t bli_l3_determine_kc
(
dir_t direct,
Expand Down Expand Up @@ -311,7 +311,7 @@ dim_t PASTEMAC0(opname) \
/* Extract the execution datatype and use it to query the corresponding
blocksize and blocksize maximum values from the blksz_t object. */ \
dt = bli_obj_exec_dt( a ); \
bsize = bli_cntx_get_blksz( bszid, cntx ); \
bsize = TRSM_BLKSZ_FUNC( bszid, cntx ); \
b_alg = bli_blksz_get_def( dt, bsize ); \
b_max = bli_blksz_get_max( dt, bsize ); \
\
Expand Down
10 changes: 8 additions & 2 deletions frame/3/trsm/bli_trsm_blk_var1.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
libraries.

Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
Expand Down Expand Up @@ -80,9 +80,15 @@ void bli_trsm_blk_var1
{
obj_t a11_1, c1_1;

//For zen architectures, TRSM uses different MC, KC and NC blocking sizes than other Level-3 routines.
//Hence calling a different function to query TRSM-specific block sizes for zen family.
#ifdef AOCL_BLIS_ZEN
b_alg = bli_determine_blocksize_trsm( direct, i, my_end, &a11,
bli_cntl_bszid( cntl ), cntx );
#else
b_alg = bli_determine_blocksize( direct, i, my_end, &a11,
bli_cntl_bszid( cntl ), cntx );

#endif
// Acquire partitions for A1 and C1.
bli_acquire_mpart_mdim( direct, BLIS_SUBPART1,
i, b_alg, &a11, &a11_1 );
Expand Down
9 changes: 8 additions & 1 deletion frame/3/trsm/bli_trsm_blk_var2.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
libraries.

Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
Expand Down Expand Up @@ -67,8 +67,15 @@ void bli_trsm_blk_var2
for ( dim_t i = my_start; i < my_end; i += b_alg )
{
// Determine the current algorithmic blocksize.
//For zen family, TRSM uses different MC, KC and NC blocksizes than Level-3 routines.
//Hence calling a different function to query TRSM-specific block sizes for zen family.
#ifdef AOCL_BLIS_ZEN
b_alg = bli_determine_blocksize_trsm( direct, i, my_end, b,
bli_cntl_bszid( cntl ), cntx );
#else
b_alg = bli_determine_blocksize( direct, i, my_end, b,
bli_cntl_bszid( cntl ), cntx );
#endif

// Acquire partitions for B1 and C1.
bli_acquire_mpart_ndim( direct, BLIS_SUBPART1,
Expand Down
73 changes: 73 additions & 0 deletions frame/base/bli_blksz.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
libraries.

Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
Expand Down Expand Up @@ -301,6 +302,78 @@ dim_t bli_determine_blocksize_b
return b_use;
}

#ifdef AOCL_BLIS_ZEN

dim_t bli_determine_blocksize_trsm
(
dir_t direct,
dim_t i,
dim_t dim,
obj_t* obj,
bszid_t bszid,
cntx_t* cntx
)
{
if ( direct == BLIS_FWD )
return bli_determine_blocksize_trsm_f( i, dim, obj, bszid, cntx );
else
return bli_determine_blocksize_trsm_b( i, dim, obj, bszid, cntx );
}

dim_t bli_determine_blocksize_trsm_f
(
dim_t i,
dim_t dim,
obj_t* obj,
bszid_t bszid,
cntx_t* cntx
)
{
num_t dt;
blksz_t* bsize;
dim_t b_alg, b_max;
dim_t b_use;

// Extract the execution datatype and use it to query the corresponding
// blocksize and blocksize maximum values from the blksz_t object.
dt = bli_obj_exec_dt( obj );
bsize = bli_cntx_get_trsm_blksz( bszid, cntx );
b_alg = bli_blksz_get_def( dt, bsize );
b_max = bli_blksz_get_max( dt, bsize );

b_use = bli_determine_blocksize_f_sub( i, dim, b_alg, b_max );

return b_use;
}

dim_t bli_determine_blocksize_trsm_b
(
dim_t i,
dim_t dim,
obj_t* obj,
bszid_t bszid,
cntx_t* cntx
)
{
num_t dt;
blksz_t* bsize;
dim_t b_alg, b_max;
dim_t b_use;

// Extract the execution datatype and use it to query the corresponding
// blocksize and blocksize maximum values from the blksz_t object.
dt = bli_obj_exec_dt( obj );
bsize = bli_cntx_get_trsm_blksz( bszid, cntx );
b_alg = bli_blksz_get_def( dt, bsize );
b_max = bli_blksz_get_max( dt, bsize );

b_use = bli_determine_blocksize_b_sub( i, dim, b_alg, b_max );

return b_use;
}

#endif

dim_t bli_determine_blocksize_f_sub
(
dim_t i,
Expand Down
33 changes: 33 additions & 0 deletions frame/base/bli_blksz.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
libraries.

Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
Expand Down Expand Up @@ -278,6 +279,38 @@ dim_t bli_determine_blocksize_b
cntx_t* cntx
);

#ifdef AOCL_BLIS_ZEN

dim_t bli_determine_blocksize_trsm
(
dir_t direct,
dim_t i,
dim_t dim,
obj_t* obj,
bszid_t bszid,
cntx_t* cntx
);

dim_t bli_determine_blocksize_trsm_f
(
dim_t i,
dim_t dim,
obj_t* obj,
bszid_t bszid,
cntx_t* cntx
);

dim_t bli_determine_blocksize_trsm_b
(
dim_t i,
dim_t dim,
obj_t* obj,
bszid_t bszid,
cntx_t* cntx
);

#endif

dim_t bli_determine_blocksize_f_sub
(
dim_t i,
Expand Down
Loading