/*******************************************************************************
* Copyright (C) 2023 Intel Corporation
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

/*
*   Content : Intel(R) oneAPI Math Kernel Library (oneMKL) IE Sparse BLAS C
*             example for mkl_sparse_sp2m
*
*             List of oneMKL routines used in the example:
*             - CBLAS_DDOT
*             - MKL_FREE
*             - MKL_MALLOC
*             - MKL_SPARSE_DESTROY
*             - MKL_SPARSE_D_CREATE_CSR
*             - MKL_SPARSE_D_EXPORT_CSR
*             - MKL_SPARSE_D_MV
*             - MKL_SPARSE_OPTIMIZE
*             - MKL_SPARSE_SET_MV_HINT
*             - MKL_SPARSE_SP2M
*
********************************************************************************
*
* Consider the matrices A
*
*                 |  10     11      0     0     0   |
*                 |   0      0     12    13     0   |
*   A    =        |  15      0      0     0    14   |,
*                 |   0     16     17     0     0   |
*                 |   0      0      0    18    19   |
*
* and B
*
*                 |   5      0      0     0     1   |
*                 |   0      6      0     0     0   |
*   B    =        |   0      0      7     0     0   |.
*                 |   0      0      0     8     0   |
*                 |   0      0      0     0     9   |
*
* Both matrices A and B are stored in a zero-based compressed sparse row (CSR) 
* storage scheme with three arrays (see 'Sparse Matrix Storage Schemes' in the
* Intel oneMKL Developer Reference) as follows:
*
*           values_A = ( 10  11  12  13  15  14  16  17  18  19 )
*          columns_A = (  0   1   2   3   0   4   1   2   3   4 )
*         rowIndex_A = (  0       2       4       6       8      10 )
*
*           values_B = ( 5  1  6  7  8  9  )
*          columns_B = ( 0  4  1  2  3  4  )
*         rowIndex_B = ( 0     2  3  4  5  6 )
*
*
* The examples performs the following operations:
*     1. Task 1 defines C_1 = A*B and computes two scalar products :
*
*        < C_1*x ,       y > = left,   using MKL_SPARSE_SP2M and CBLAS_DDOT.
*        < B*x   , (A^t)*y > = right,  using MKL_SPARSE_D_MV and CBLAS_DDOT.
*
*        These products should result in the same value. To display matrix C_1,
*        MKL_SPARSE_D_EXPORT_CSR is used and the result is printed.
*
*        The task uses the two-stage algorithm by calling MKL_SPARSE_SP2M
*        twice:
*        - first to allocate the row_start/row_end arrays (request =
*          SPARSE_STAGE_NNZ_COUNT)
*        - then to allocate and compute the column indices and values of non-
*          zero elements (request = SPARSE_STAGE_FINALIZE_MULT)
*
*     2. Task 2 defines C_2 = A*(B^t) and computes two scalar products :
*
*        <   C_2*x ,       y > = left,   using MKL_SPARSE_SP2M and CBLAS_DDOT.
*        < (B^t)*x , (A^t)*y > = right,  using MKL_SPARSE_D_MV and CBLAS_DDOT.
*
*        These products should result in the same value. To display matrix C_2,
*        MKL_SPARSE_D_EXPORT_CSR is used and the result is printed.
*
*        The task allocates and computes the entire output matrix in a single
*        call to MKL_SPARSE_SP2M (request = SPARSE_STAGE_FULL_MULT)
*
******************************************************************************/

#include <stdio.h>
#include <assert.h>
#include <math.h>
#include "mkl.h"

#ifdef MKL_ILP64
#define INT_PRINT_FORMAT "%lld"
#else
#define INT_PRINT_FORMAT "%d"
#endif

int main() {

#define M 5
#define NNZ_A 10
#define NNZ_B 6
#define ALIGN 128

/* To avoid constantly repeating the part of code that checks inbound SparseBLAS functions' status,
   use macro CALL_AND_CHECK_STATUS */
#define CALL_AND_CHECK_STATUS(function, error_message) do { \
          if(function != SPARSE_STATUS_SUCCESS)             \
          {                                                 \
          printf(error_message); fflush(0);                 \
          status = 1;                                       \
          goto memory_free;                                 \
          }                                                 \
} while(0)

    /* Declaration of values */
    double  *values_A = NULL, *values_B = NULL, *values_C = NULL;
    MKL_INT *columns_A = NULL, *columns_B = NULL, *columns_C = NULL;
    MKL_INT *rowIndex_A = NULL, *rowIndex_B = NULL, *pointerB_C = NULL, *pointerE_C = NULL;
    double  *rslt_mv = NULL, *rslt_mv_trans = NULL, *x = NULL, *y = NULL;
    double   left, right, residual_1, residual_2;
    MKL_INT  rows, cols, i, j, ii, status;
    sparse_index_base_t    indexing;
    struct matrix_descr    descr_type_gen;
    sparse_request_t       request;
    sparse_matrix_t        csrA = NULL, csrB = NULL, csrC_1 = NULL, csrC_2 = NULL;
    sparse_operation_t     opA, opB;

    /* Allocation of memory */
    values_A = (double *)mkl_malloc(sizeof(double) * NNZ_A, ALIGN);
    columns_A = (MKL_INT *)mkl_malloc(sizeof(MKL_INT) * NNZ_A, ALIGN);
    rowIndex_A = (MKL_INT *)mkl_malloc(sizeof(MKL_INT) * (M + 1), ALIGN);

    values_B = (double *)mkl_malloc(sizeof(double) * NNZ_B, ALIGN);
    columns_B = (MKL_INT *)mkl_malloc(sizeof(MKL_INT) * NNZ_B, ALIGN);
    rowIndex_B = (MKL_INT *)mkl_malloc(sizeof(MKL_INT) * (M + 1), ALIGN);

    x = (double *)mkl_malloc(sizeof(double) * M, ALIGN);
    y = (double *)mkl_malloc(sizeof(double) * M, ALIGN);
    rslt_mv = (double *)mkl_malloc(sizeof(double) * M, ALIGN);
    rslt_mv_trans = (double *)mkl_malloc(sizeof(double) * M, ALIGN);

    /* Set values of the variables*/
    descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
    status = 0;

    /* Matrix A */
    for( i = 0; i < NNZ_A; i++ )
          values_A[i] = ((double) i) + 10.0;
    for( i = 0; i < NNZ_A; i++ )
          columns_A[i] = i % 5;
    rowIndex_A[0] = 0;
    for( i = 1; i < M + 1; i++ )
          rowIndex_A[i] = rowIndex_A[i - 1] + 2;

    /* Matrix B */
    ii = 0;
    rowIndex_B[0] = 0;
    for( i = 0; i < M; i++ )
    {
        values_B[ii] = ((double) i) + 5.0;
        columns_B[ii] = i;
        ii++;
        rowIndex_B[i+1] = rowIndex_B[i] + 1;
        if (i == 0)
        {
            values_B[ii] = 1;
            columns_B[ii] = M-1;
            ii++;
            rowIndex_B[i+1]++;
        }
    }

    /* Vectors x and y */
    for( i = 0; i < M; i++ )
    {
          x[i] = 1.0; y[i] = 1.0;
    }
    /* Printing usable data */
    printf( "\n\n_______________Example program for MKL_SPARSE_SP2M_________________\n\n" );
    printf( "Input matrices stored in CSR format: \n" );
    printf( "\n MATRIX A:\nrow# : (value, column) (value, column)\n" );
    ii = 0;
    for( i = 0; i < M; i++ )
    {
        printf("row#" INT_PRINT_FORMAT ":", i + 1); fflush(0);
        for( j = rowIndex_A[i]; j < rowIndex_A[i+1]; j++ )
        {
            printf(" (%5.0f, " INT_PRINT_FORMAT ")", values_A[ii], columns_A[ii] ); fflush(0);
            ii++;
        }
        printf( "\n" );
    }
    ii = 0;
    printf( "\n MATRIX B:\nrow# : (value, column)\n" );
    for( i = 0; i < M; i++ )
    {
        printf("row#" INT_PRINT_FORMAT ":", i + 1); fflush(0);
        for( j = rowIndex_B[i]; j < rowIndex_B[i+1]; j++ )
        {
            printf(" (%5.0f, " INT_PRINT_FORMAT ")", values_B[ii], columns_B[ii] ); fflush(0);
            ii++;
        }
        printf( "\n" );
    }

    /* Prepare arrays, which are related to matrices.
       Create handles for matrices A and B stored in CSR format */
    CALL_AND_CHECK_STATUS(mkl_sparse_d_create_csr( &csrA, SPARSE_INDEX_BASE_ZERO, M, M, rowIndex_A, rowIndex_A+1, columns_A, values_A ),
                          "Error after MKL_SPARSE_D_CREATE_CSR, csrA \n");
    CALL_AND_CHECK_STATUS(mkl_sparse_d_create_csr( &csrB, SPARSE_INDEX_BASE_ZERO, M, M, rowIndex_B, rowIndex_B+1, columns_B, values_B ),
                          "Error after MKL_SPARSE_D_CREATE_CSR, csrB \n");


    /* Analytic Routines for MKL_SPARSE_D_MV.
       HINTS: provides estimate of number and type of upcoming matrix-vector operations
       OPTIMIZE: analyze sparse matrix; choose proper kernels and workload balancing strategy */
    CALL_AND_CHECK_STATUS(mkl_sparse_set_mv_hint( csrA, SPARSE_OPERATION_TRANSPOSE,     descr_type_gen, 1 ),
                          "Error after MKL_SPARSE_SET_MV_HINT, csrA \n");
    CALL_AND_CHECK_STATUS(mkl_sparse_set_mv_hint( csrB, SPARSE_OPERATION_NON_TRANSPOSE, descr_type_gen, 1 ),
                          "Error after MKL_SPARSE_SET_MV_HINT, csrB \n");
    CALL_AND_CHECK_STATUS(mkl_sparse_set_mv_hint( csrB, SPARSE_OPERATION_TRANSPOSE,     descr_type_gen, 1 ),
                          "Error after MKL_SPARSE_SET_MV_HINT, csrB \n");

    CALL_AND_CHECK_STATUS(mkl_sparse_optimize( csrA ),
                          "Error after MKL_SPARSE_OPTIMIZE, csrA \n");
    CALL_AND_CHECK_STATUS(mkl_sparse_optimize( csrB ),
                          "Error after MKL_SPARSE_OPTIMIZE, csrB \n");

/********************************************************************************
*   Task 1: 
********************************************************************************/
    printf( "\n-------------------------------------------------------\n" );    
    printf( " Task 1: \n" );
    printf( " Compute C_1 = A * B using MKL_SPARSE_SP2M \n" );

    /* Compute C_1 = A * B  */
    opA = SPARSE_OPERATION_NON_TRANSPOSE;
    opB = SPARSE_OPERATION_NON_TRANSPOSE;

    request = SPARSE_STAGE_NNZ_COUNT;
    CALL_AND_CHECK_STATUS(mkl_sparse_sp2m( opA, descr_type_gen, csrA, 
                                           opB, descr_type_gen, csrB, 
                                           request, &csrC_1 ),
                          "Error in Task 1 after MKL_SPARSE_SP2M, request SPARSE_STAGE_NNZ_COUNT \n");

    request = SPARSE_STAGE_FINALIZE_MULT;
    CALL_AND_CHECK_STATUS(mkl_sparse_sp2m( opA, descr_type_gen, csrA, 
                                           opB, descr_type_gen, csrB, 
                                           request, &csrC_1 ),
                          "Error in Task 1 after MKL_SPARSE_SP2M, request SPARSE_STAGE_FINALIZE_MULT \n");

    CALL_AND_CHECK_STATUS(mkl_sparse_set_mv_hint( csrC_1, SPARSE_OPERATION_NON_TRANSPOSE, descr_type_gen, 1 ),
                          "Error in Task 1 after MKL_SPARSE_SET_MV_HINT, csrC_1 \n");

    CALL_AND_CHECK_STATUS(mkl_sparse_optimize( csrC_1 ),
                          "Error in Task 1 after MKL_SPARSE_OPTIMIZE, csrC_1 \n");

    /* Execution Routines */
    /* Step 1:
       Need to compute the following variables:
           rslt_mv = C_1 * x
              left = <rslt_mv, y>              */
    CALL_AND_CHECK_STATUS(mkl_sparse_d_mv( SPARSE_OPERATION_NON_TRANSPOSE, 1.0, csrC_1, descr_type_gen, x, 0.0, rslt_mv ),
                          "Error in Task 1 after MKL_SPARSE_D_MV, csrC_1*x  \n");
    left = cblas_ddot( M, rslt_mv, 1, y, 1 );

    /* Step 2:
       Need to compute the following variables:
             rslt_mv =     B * x
       rslt_mv_trans = (A^t) * y
               right = <rslt_mv, rslt_mv_trans>  */

    CALL_AND_CHECK_STATUS(mkl_sparse_d_mv( SPARSE_OPERATION_NON_TRANSPOSE, 1.0, csrB, descr_type_gen, x, 0.0, rslt_mv ),
                          "Error in Task 1 after MKL_SPARSE_D_MV, csrB*x  \n");
    CALL_AND_CHECK_STATUS(mkl_sparse_d_mv( SPARSE_OPERATION_TRANSPOSE,     1.0, csrA, descr_type_gen, y, 0.0, rslt_mv_trans),
                          "Error in Task 1 after MKL_SPARSE_D_MV, csrA*y  \n");
    right = cblas_ddot( M, rslt_mv, 1, rslt_mv_trans, 1);

    /* Step 3:
       Compare values obtained for left and right  */
    residual_1 = fabs(left - right)/(fabs(left)+1);

    printf( "\n Check the resultant matrix C_1, using two scalar products.\n" );
    printf( "\n The difference between < C_1*x , y > and < B*x , (A^t)*y > = %g.\n", residual_1);
    CALL_AND_CHECK_STATUS(residual_1 < 1e-8 * sqrt(M) ? 0 : 1,
                          " Error in Task 1: MKL_SPARSE_SP2M did not at the correct solution.\n");

    printf( " Success: MKL_SPARSE_SP2M arrived at the correct solution.\n" );
    /* Printing output data */
    CALL_AND_CHECK_STATUS(mkl_sparse_d_export_csr( csrC_1, &indexing, &rows, &cols, &pointerB_C, &pointerE_C, &columns_C, &values_C ),
                          "Error in Task 1 after MKL_SPARSE_D_EXPORT_CSR  \n");

    printf( "\n RESULTANT MATRIX C_1:\nrow# : (value, column) (value, column)\n" );
    ii = 0;
    for( i = 0; i < M; i++ )
    {
        printf("row#" INT_PRINT_FORMAT ":", i + 1); fflush(0);
        for( j = pointerB_C[i]; j < pointerE_C[i]; j++ )
        {
            printf(" (%5.0f, " INT_PRINT_FORMAT ")", values_C[ii], columns_C[ii] ); fflush(0);
            ii++;
        }
        printf( "\n" );
    }

/********************************************************************************
*  Task 2: 
********************************************************************************/
    printf( "\n-------------------------------------------------------\n" );    
    printf( " Task 2: \n" );
    printf( " Compute C_2 = A * (B^t) using MKL_SPARSE_SP2M \n" );

    /* Compute C_2 = A * (B^t)  */
    opA = SPARSE_OPERATION_NON_TRANSPOSE;
    opB = SPARSE_OPERATION_TRANSPOSE;

    request = SPARSE_STAGE_FULL_MULT;
    CALL_AND_CHECK_STATUS(mkl_sparse_sp2m( opA, descr_type_gen, csrA, 
                                           opB, descr_type_gen, csrB, 
                                           request, &csrC_2 ),
                          "Error in Task 2 after MKL_SPARSE_SP2M, request SPARSE_STAGE_FULL_MULT \n");

    CALL_AND_CHECK_STATUS(mkl_sparse_set_mv_hint( csrC_2, SPARSE_OPERATION_NON_TRANSPOSE, descr_type_gen, 1 ),
                          "Error in Task 2 after MKL_SPARSE_SET_MV_HINT, csrC_2 \n");

    CALL_AND_CHECK_STATUS(mkl_sparse_optimize( csrC_2 ),
                          "Error in Task 2 after MKL_SPARSE_OPTIMIZE, csrC_2 \n");

    /* Execution Routines */
    /* Step 1:
       Need to compute the following variables:
           rslt_mv = C_2 * x
              left = <rslt_mv, y>              */
    CALL_AND_CHECK_STATUS(mkl_sparse_d_mv( SPARSE_OPERATION_NON_TRANSPOSE, 1.0, csrC_2, descr_type_gen, x, 0.0, rslt_mv ),
                          "Error in Task 2 after MKL_SPARSE_D_MV, csrC_2*x  \n");
    left = cblas_ddot( M, rslt_mv, 1, y, 1 );

    /* Step 2:
       Need to compute the following variables:
                 rslt_mv = (B^t) * x
           rslt_mv_trans = (A^t) * y
                   right = <rslt_mv, rslt_mv_trans>  */

    CALL_AND_CHECK_STATUS(mkl_sparse_d_mv( SPARSE_OPERATION_TRANSPOSE, 1.0, csrB, descr_type_gen, x, 0.0, rslt_mv ),
                          "Error in Task 2 after MKL_SPARSE_D_MV, csrB*x  \n");
    CALL_AND_CHECK_STATUS(mkl_sparse_d_mv( SPARSE_OPERATION_TRANSPOSE, 1.0, csrA, descr_type_gen, y, 0.0, rslt_mv_trans),
                          "Error in Task 2 after MKL_SPARSE_D_MV, csrA*y  \n");
    right = cblas_ddot( M, rslt_mv, 1, rslt_mv_trans, 1);

    /* Step 3:
       Compare values obtained for left and right  */
    residual_2 = fabs(left - right)/(fabs(left)+1);

    printf( "\n Check the resultant matrix C_2, using two scalar products.\n" );
    printf( "\n The difference between < C_2*x , y > and < (B^t)*x , (A^t)*y > = %g,\n", residual_2 );
    CALL_AND_CHECK_STATUS(residual_2 < 1e-8 * sqrt(M) ? 0 : 1,
                          " Error in Task 2: MKL_SPARSE_SP2M did not at the correct solution.\n");

    printf( " Success: MKL_SPARSE_SP2M arrived at the correct solution.\n" );
    /* Printing output data */
    CALL_AND_CHECK_STATUS(mkl_sparse_d_export_csr( csrC_2, &indexing, &rows, &cols, &pointerB_C, &pointerE_C, &columns_C, &values_C ),
                          "Error in Task 2 after MKL_SPARSE_D_EXPORT_CSR  \n");

    printf( "\n RESULTANT MATRIX C_2:\nrow# : (value, column) (value, column)\n" );
    ii = 0;
    for( i = 0; i < M; i++ )
    {
        printf("row#" INT_PRINT_FORMAT ":", i + 1); fflush(0);
        for( j = pointerB_C[i]; j < pointerE_C[i]; j++ )
        {
            printf(" (%5.0f, " INT_PRINT_FORMAT ")", values_C[ii], columns_C[ii] ); fflush(0);
            ii++;
        }
        printf( "\n" );
    }
    printf( "_____________________________________________________________________  \n" );

    /* Deallocate memory */
memory_free:
    /* Release matrix handle. Not necessary to deallocate arrays for which we don't allocate memory: values_C, columns_C, pointerB_C, and pointerE_C.
       These arrays will be deallocated together with csrC_1 structure. */
    if( mkl_sparse_destroy( csrC_1 ) != SPARSE_STATUS_SUCCESS)
    { printf(" Error after MKL_SPARSE_DESTROY, csrC_1 \n");fflush(0); status = 1; }

    if( mkl_sparse_destroy( csrC_2 ) != SPARSE_STATUS_SUCCESS)
    { printf(" Error after MKL_SPARSE_DESTROY, csrC_2 \n");fflush(0); status = 1; }

    /* Deallocate arrays for which we allocate memory ourselves. */
    mkl_free(rslt_mv_trans); mkl_free(rslt_mv); mkl_free(x); mkl_free(y);

    /* Release matrix handle and deallocate arrays for which we allocate memory ourselves. */
    if( mkl_sparse_destroy( csrA ) != SPARSE_STATUS_SUCCESS)
    { printf(" Error after MKL_SPARSE_DESTROY, csrA \n");fflush(0); status = 1; }
    mkl_free(values_A); mkl_free(columns_A); mkl_free(rowIndex_A);

    if( mkl_sparse_destroy( csrB ) != SPARSE_STATUS_SUCCESS)
    { printf(" Error after MKL_SPARSE_DESTROY, csrB \n");fflush(0); status = 1; }
    mkl_free(values_B); mkl_free(columns_B); mkl_free(rowIndex_B);

    return status;
}
