1 | #ifndef ZMM_H |
---|
2 | #define ZMM_H |
---|
3 | |
---|
4 | #define ATL_mmMULADD |
---|
5 | #define ATL_mmLAT 10 |
---|
6 | #define ATL_mmMU 5 |
---|
7 | #define ATL_mmNU 4 |
---|
8 | #define ATL_mmKU 48 |
---|
9 | #define MB 48 |
---|
10 | #define NB 48 |
---|
11 | #define KB 48 |
---|
12 | #define NBNB 2304 |
---|
13 | #define MBNB 2304 |
---|
14 | #define MBKB 2304 |
---|
15 | #define NBKB 2304 |
---|
16 | #define NB2 96 |
---|
17 | #define NBNB2 4608 |
---|
18 | |
---|
19 | #define ATL_MulByNB(N_) ((N_) * 48) |
---|
20 | #define ATL_DivByNB(N_) ((N_) / 48) |
---|
21 | #define ATL_MulByNBNB(N_) ((N_) * 2304) |
---|
22 | void ATL_zJIK48x48x48TN48x48x0_a1_b0(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc); |
---|
23 | void ATL_zJIK48x48x48TN48x48x0_a1_b1(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc); |
---|
24 | void ATL_zJIK48x48x48TN48x48x0_a1_bX(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc); |
---|
25 | |
---|
26 | #define NBmm_b1(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \ |
---|
27 | { \ |
---|
28 | ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rnone, C_, ldc_); \ |
---|
29 | ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rone, (C_)+1, ldc_); \ |
---|
30 | ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \ |
---|
31 | ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \ |
---|
32 | } |
---|
33 | #define NBmm_b0(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \ |
---|
34 | { \ |
---|
35 | ATL_zJIK48x48x48TN48x48x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rzero, C_, ldc_); \ |
---|
36 | ATL_zJIK48x48x48TN48x48x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rzero, (C_)+1, ldc_); \ |
---|
37 | ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \ |
---|
38 | ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \ |
---|
39 | } |
---|
40 | #define NBmm_bX(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \ |
---|
41 | { \ |
---|
42 | ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, -(be_), C_, ldc_); \ |
---|
43 | ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, be_, (C_)+1, ldc_); \ |
---|
44 | ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \ |
---|
45 | ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \ |
---|
46 | } |
---|
47 | #define rNBmm_b1 ATL_dJIK48x48x48TN48x48x0_a1_b1 |
---|
48 | #define rNBmm_b0 ATL_dJIK48x48x48TN48x48x0_a1_b0 |
---|
49 | #define rNBmm_bX ATL_dJIK48x48x48TN48x48x0_a1_bX |
---|
50 | |
---|
51 | #endif |
---|