[97] | 1 | #ifndef CMM_H |
---|
| 2 | #define CMM_H |
---|
| 3 | |
---|
| 4 | #define ATL_mmMULADD |
---|
| 5 | #define ATL_mmLAT 10 |
---|
| 6 | #define ATL_mmMU 5 |
---|
| 7 | #define ATL_mmNU 4 |
---|
| 8 | #define ATL_mmKU 80 |
---|
| 9 | #define MB 80 |
---|
| 10 | #define NB 80 |
---|
| 11 | #define KB 80 |
---|
| 12 | #define NBNB 6400 |
---|
| 13 | #define MBNB 6400 |
---|
| 14 | #define MBKB 6400 |
---|
| 15 | #define NBKB 6400 |
---|
| 16 | #define NB2 160 |
---|
| 17 | #define NBNB2 12800 |
---|
| 18 | |
---|
| 19 | #define ATL_MulByNB(N_) ((N_) * 80) |
---|
| 20 | #define ATL_DivByNB(N_) ((N_) / 80) |
---|
| 21 | #define ATL_MulByNBNB(N_) ((N_) * 6400) |
---|
| 22 | void ATL_cJIK80x80x80TN80x80x0_a1_b0(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc); |
---|
| 23 | void ATL_cJIK80x80x80TN80x80x0_a1_b1(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc); |
---|
| 24 | void ATL_cJIK80x80x80TN80x80x0_a1_bX(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc); |
---|
| 25 | |
---|
| 26 | #define NBmm_b1(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \ |
---|
| 27 | { \ |
---|
| 28 | ATL_cJIK80x80x80TN80x80x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rnone, C_, ldc_); \ |
---|
| 29 | ATL_cJIK80x80x80TN80x80x0_a1_b1(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rone, (C_)+1, ldc_); \ |
---|
| 30 | ATL_cJIK80x80x80TN80x80x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \ |
---|
| 31 | ATL_cJIK80x80x80TN80x80x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \ |
---|
| 32 | } |
---|
| 33 | #define NBmm_b0(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \ |
---|
| 34 | { \ |
---|
| 35 | ATL_cJIK80x80x80TN80x80x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rzero, C_, ldc_); \ |
---|
| 36 | ATL_cJIK80x80x80TN80x80x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rzero, (C_)+1, ldc_); \ |
---|
| 37 | ATL_cJIK80x80x80TN80x80x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \ |
---|
| 38 | ATL_cJIK80x80x80TN80x80x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \ |
---|
| 39 | } |
---|
| 40 | #define NBmm_bX(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \ |
---|
| 41 | { \ |
---|
| 42 | ATL_cJIK80x80x80TN80x80x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, -(be_), C_, ldc_); \ |
---|
| 43 | ATL_cJIK80x80x80TN80x80x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, be_, (C_)+1, ldc_); \ |
---|
| 44 | ATL_cJIK80x80x80TN80x80x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \ |
---|
| 45 | ATL_cJIK80x80x80TN80x80x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \ |
---|
| 46 | } |
---|
| 47 | #define rNBmm_b1 ATL_sJIK80x80x80TN80x80x0_a1_b1 |
---|
| 48 | #define rNBmm_b0 ATL_sJIK80x80x80TN80x80x0_a1_b0 |
---|
| 49 | #define rNBmm_bX ATL_sJIK80x80x80TN80x80x0_a1_bX |
---|
| 50 | |
---|
| 51 | #endif |
---|