#ifndef LA_WRAPPER
#define LA_WRAPPER

#include <complex>
#define lapack_complex_float std::complex<float>
#define lapack_complex_double std::complex<double>

#include <mkl.h>
#include <sstream>

namespace blas_wrapper
{

//! @param n : Anzahl Elemente.
//! @param x : Quellvektor x.
//! @param incx : Speicher Abstand zwischen Elemente in Vector x.
//! @param y : Zielvektor (y= alpha*x+y).
//! @param incy: Speicher Abstand zwischen Elemente in Vector y.

static void
copy(int n, const float *x, int incx, float *y, int incy)
{
    cblas_scopy(n, x, incx, y, incy);
}

static void
copy(int n, const double *x, int incx, double *y, int incy)
{
    cblas_dcopy(n, x, incx, y, incy);
}

static void
copy(int n, const std::complex<double> *x, int incx, std::complex<double> *y, int incy)
{
    cblas_zcopy(n, reinterpret_cast<const double*>(x), incx, reinterpret_cast<double*>(y), incy);
}

static void
copy(int n, const std::complex<float> *x, int incx, std::complex<float> *y, int incy)
{
    cblas_ccopy(n, reinterpret_cast<const float*>(x), incx, reinterpret_cast<float*>(y), incy);
}


//! @param n : Anzahl Elemente.
//! @param alpha: Skalar.
//! @param x : Quellvektor x (auch output).
//! @param incx : Speicher Abstand zwischen Elemente in Vector x.

static void
scal (int n, float alpha, float *x, int incx)
{
    cblas_sscal (n, alpha, x, incx);
}

static void
scal (int n, double alpha, double *x, int incx)
{
    cblas_dscal (n, alpha, x, incx);
}

static void
scal (int n, std::complex<float> alpha, std::complex<float> *x, int incx)
{
   cblas_cscal(n, reinterpret_cast<const float*>(&alpha), reinterpret_cast<float*>(x), incx);
}

static void
scal (int n, std::complex<double>  alpha, std::complex<double> *x, int incx)
{
   cblas_zscal(n, reinterpret_cast<const double*>(&alpha), reinterpret_cast<double*>(x), incx);
}

//! @param n : Anzahl Elemente.
//! @param alpha: Skalar.
//! @param x : Quellvektor x.
//! @param incx : Speicher Abstand zwischen Elemente in Vector x.
//! @param y : Quellvektor y (auch output).
//! @param incy : Speicher Abstand zwischen Elemente in Vector y.

static void
axpy (const int n, const float a, const float *x, const int incx, float *y, const int incy)
{
    cblas_saxpy(n, a, x, incx, y, incy);
}

static void
axpy (const int n, const double a, const double *x, const int incx, double *y, const int incy)
{
    cblas_daxpy(n, a, x, incx, y, incy);
}

static void
axpy (const int n, const std::complex<float> a, const std::complex<float> *x, const int incx, std::complex<float> *y, const int incy)
{
    cblas_caxpy(n, reinterpret_cast<const float*>(&a), reinterpret_cast<const float*>(x), incx, reinterpret_cast<float*>(y), incy);
}

static void
axpy (const int n, const std::complex<double> a, const std::complex<double> *x, const int incx, std::complex<double> *y, const int incy)
{
    cblas_zaxpy(n, reinterpret_cast<const double*>(&a), reinterpret_cast<const double*>(x), incx, reinterpret_cast<double*>(y), incy);
}

//! @param trans : gibt an ob A transponiert ist oder nicht. Sei trans = 'N' oder 'n' so ist op(A)= A, sei trans = 'T', 't','C' oder 'c' so ist op(A)= trans(A)
//! @param m : Anzahl Zeilen in Matrix A.
//! @param n : Anzahl Spalten in Matrix A.
//! @param alpha: Skalar fuer A.
//! @param A : Matrix A
//! @param lda : leading dimension von A.
//! @param x : Vektor mit der laenge von mindestens (1+(n-1)*abs(incx)) falls trans = 'N' oder 'n', sonst mindestens der laenge (1+(m-1)*abs(incx)).
//! @param incx : Speicher Abstand zwischen Elemente in Vector x.
//! @param beta : Skalar fuer Vektor y.
//! @param y : Vektor mit der laenge von mindestens (1+(n-1)*abs(incy)) falls trans = 'N' oder 'n', sonst mindestens der laenge (1+(m-1)*abs(incy)).
//! @param incy: Speicher Abstand zwischen Elemente in Vector y.

static void gemv (char trans, int m, int n, float alpha,
                  const float * const A, int lda,
                  const float * const x,  int incx, float beta,
                  float *y, int incy)
{
    CBLAS_TRANSPOSE tr = ( ( (trans == 't') || (trans == 'T') ) ? CblasTrans : CblasNoTrans );
    cblas_sgemv (CblasColMajor, tr, m, n, alpha, A, lda, x, incx, beta, y, incy);
}

static void gemv (char trans, int m, int n, double alpha,
                  const double * const A, int lda,
                  const double * const x,  int incx, double beta,
                  double *y, int incy)
{
    CBLAS_TRANSPOSE tr = ( ( (trans == 't') || (trans == 'T') ) ? CblasTrans : CblasNoTrans );
    cblas_dgemv (CblasColMajor, tr, m, n, alpha, A, lda, x, incx, beta, y, incy);
}

static void gemv (char trans, int m, int n, std::complex<float> & alpha,
                  const std::complex<float> * const A, int lda,
                  const std::complex<float> * const x,  int incx, std::complex<float> & beta,
                  std::complex<float> *y, int incy)
{
    CBLAS_TRANSPOSE tr = ( ( (trans == 't') || (trans == 'T') ) ? CblasTrans : CblasNoTrans );
    cblas_cgemv (CblasColMajor, tr, m, n, &alpha, A, lda, x, incx, &beta, y, incy);
}

static void gemv (char trans, int m, int n, std::complex<double> & alpha,
                  const std::complex<double> * const A, int lda,
                  const std::complex<double> * const x,  int incx, std::complex<double>  & beta,
                  std::complex<double> *y, int incy)
{
    CBLAS_TRANSPOSE tr = ( ( (trans == 't') || (trans == 'T') ) ? CblasTrans : CblasNoTrans );
    cblas_zgemv (CblasColMajor, tr, m, n, &alpha, A, lda, x, incx, &beta, y, incy);
}


//! @param transa : gibt an ob A transponiert ist oder nicht. Sei transa = 'N' oder 'n' so ist op(A)= A, sei transa = 'T' oder 't' so ist op(A)= trans(A), sei transa = 'C' oder 'c' so ist op(A)=adjoint(A)
//! @param transb : gibt an ob B transponiert ist oder nicht. Sei transb = 'N' oder 'n' so ist op(B)= A, sei transb = 'T' oder 't' so ist op(B)= trans(B), sei transb = 'C' oder 'c' so ist op(B)=adjoint(B)
//! @param m : Anzahl Zeilen in Matrix A und Matrix C.
//! @param n : Anzahl Spalten in Matrix B und Matrix C.
//! @param k : Anzahl Spalten in Matrix A und Zeilen in Matrix B.
//! @param alpha: Skalar fuer op(A)*op(B).
//! @param A : Matrix A
//! @param lda : leading dimension von A.
//! @param B : Matrix B.
//! @param ldb : leading dimension von B.
//! @param beta : Skalar fuer Matrix C.
//! @param C : Matrix C.
//! @param ldc : leading dimension von C.
static void gemm(char transa, char transb, int m, int n, int k, float alpha,
                 const float * const A, int lda, const float * const B, int ldb,
                 float beta, float * C, int ldc)
{
    CBLAS_TRANSPOSE tr_a = ( ( (transa == 't') || (transa == 'T') ) ? CblasTrans : ( (transa == 'c') || (transa == 'C') ) ? CblasTrans : CblasNoTrans );
    CBLAS_TRANSPOSE tr_b = ( ( (transb == 't') || (transb == 'T') ) ? CblasTrans : ( (transb == 'c') || (transb == 'C') ) ? CblasTrans : CblasNoTrans );


    cblas_sgemm(CblasColMajor,
                tr_a, tr_b,
                m, n, k,
                alpha,
                A, lda,
                B, ldb,
                beta,
                C, ldc);
}


static void gemm(char transa, char transb, int m, int n, int k, double alpha,
                 const double * const A, int lda, const double * const B, int ldb,
                 double beta, double * C, int ldc)
{
    CBLAS_TRANSPOSE tr_a = ( ( (transa == 't') || (transa == 'T') ) ? CblasTrans : ( (transa == 'c') || (transa == 'C') ) ? CblasTrans : CblasNoTrans );
    CBLAS_TRANSPOSE tr_b = ( ( (transb == 't') || (transb == 'T') ) ? CblasTrans : ( (transb == 'c') || (transb == 'C') ) ? CblasTrans : CblasNoTrans );

    cblas_dgemm(CblasColMajor, tr_a, tr_b, m, n, k, alpha,
                A, lda, B, ldb,
                beta, C, ldc);
}

static void gemm(char transa, char transb, int m, int n, int k, const std::complex<float> alpha,
                 const std::complex<float> * const A, int lda, const std::complex<float> * const B, int ldb,
                 const std::complex<float> beta, std::complex<float> * C, int ldc)
{
    CBLAS_TRANSPOSE tr_a = ( ( (transa == 't') || (transa == 'T') ) ? CblasTrans : ( (transa == 'c') || (transa == 'C') ) ? CblasConjTrans : CblasNoTrans );
    CBLAS_TRANSPOSE tr_b = ( ( (transb == 't') || (transb == 'T') ) ? CblasTrans : ( (transb == 'c') || (transb == 'C') ) ? CblasConjTrans : CblasNoTrans );

    cblas_cgemm(CblasColMajor,
                tr_a, tr_b,
                m, n, k,
                reinterpret_cast<const float*>(&alpha),
                reinterpret_cast<const float*>(A), lda,
                reinterpret_cast<const float*>(B), ldb,
                reinterpret_cast<const float*>(&beta),
                reinterpret_cast<float*>(C), ldc);
}


static void gemm(char transa, char transb, int m, int n, int k, const std::complex<double> alpha,
                 const std::complex<double> * const A, int lda, const std::complex<double> * const B, int ldb,
                 const std::complex<double> beta, std::complex<double> * C, int ldc)
{
    CBLAS_TRANSPOSE tr_a = ( ( (transa == 't') || (transa == 'T') ) ? CblasTrans : ( (transa == 'c') || (transa == 'C') ) ? CblasConjTrans : CblasNoTrans );
    CBLAS_TRANSPOSE tr_b = ( ( (transb == 't') || (transb == 'T') ) ? CblasTrans : ( (transb == 'c') || (transb == 'C') ) ? CblasConjTrans : CblasNoTrans );

    cblas_zgemm(CblasColMajor,
                tr_a, tr_b,
                m, n, k,
                reinterpret_cast<const double*>(&alpha),
                reinterpret_cast<const double*>(A), lda,
                reinterpret_cast<const double*>(B), ldb,
                reinterpret_cast<const double*>(&beta),
                reinterpret_cast<double*>(C), ldc);
}

//! @param jobz : Must be 'N' or 'V'. If jobz = 'N', then only eigenvalues are computed. If jobz = 'V', then eigenvalues and eigenvectors are computed.
//! @param n : The order of the matrix A (n≥ 0). 
//! @param A : A (size max(1, lda*n)) is an array containing either upper or lower triangular part of the Hermitian matrix A, as specified by uplo. On exit, then if succesfull, array A contains the orthonormal eigenvectors of the matrix A.
//! @param lda : The leading dimension of the array A. Must be at least max(1, n).
//! @param W : Array, size at least max(1, n). Contains the eigenvalues of the matrix A in ascending order.

static void xxev(char jobz, lapack_int n, float* A, lapack_int lda, float* w)
{
    int info = LAPACKE_ssyev(LAPACK_COL_MAJOR, jobz, 'U', n, A, lda, w);

    if (info > 0) {

        std::stringstream msg("");
        msg << "Error in syev: " << info << " elements are not converged" << std::endl;
        throw std::runtime_error(msg.str());

    } else if (info < 0) {

        std::stringstream msg("");
        msg << "Error in syev: Invalid input argument " << -info << std::endl;
        throw std::runtime_error(msg.str());

    }
}

static void xxev(char jobz, lapack_int n, double* A, lapack_int lda, double* w)
{
    int info = LAPACKE_dsyev(LAPACK_COL_MAJOR, jobz, 'U', n, A, lda, w);

    if (info > 0) {

        std::stringstream msg("");
        msg << "Error in syev: " << info << " elements are not converged" << std::endl;
        throw std::runtime_error(msg.str());

    } else if (info < 0) {

        std::stringstream msg("");
        msg << "Error in syev: Invalid input argument " << -info << std::endl;
        throw std::runtime_error(msg.str());

    }
}

static void xxev(char jobz, lapack_int n, std::complex<float>* A, lapack_int lda, float* w)
{
    int info = LAPACKE_cheev(LAPACK_COL_MAJOR, jobz, 'U', n, A, lda, w);

    if (info > 0) {

        std::stringstream msg("");
        msg << "Error in heev: " << info << " elements are not converged" << std::endl;
        throw std::runtime_error(msg.str());

    } else if (info < 0) {

        std::stringstream msg("");
        msg << "Error in heev: Invalid input argument " << -info << std::endl;
        throw std::runtime_error(msg.str());

    }
}

static void xxev(char jobz, lapack_int n, std::complex<double>* A, lapack_int lda, double* w)
{
    int info = LAPACKE_zheev(LAPACK_COL_MAJOR, jobz, 'U', n, A, lda, w);

    if (info > 0) {

        std::stringstream msg("");
        msg << "Error in heev: " << info << " elements are not converged" << std::endl;
        throw std::runtime_error(msg.str());

    } else if (info < 0) {

        std::stringstream msg("");
        msg << "Error in heev: Invalid input argument " << -info << std::endl;
        throw std::runtime_error(msg.str());

    }
}

//! @param jobz : Must be 'N' or 'V'. If jobz = 'N', then only eigenvalues are computed. If jobz = 'V', then eigenvalues and eigenvectors are computed.
//! @param range: Must be 'A' or 'V' or 'I'. If range = 'A', the routine computes all eigenvalues. If range = 'V', the routine computes eigenvalues w[i] in the half-open interval: vl < w[i]≤vu. If range = 'I', the routine computes eigenvalues with indices il to iu. For range = 'V'or 'I' and iu-il < n-1, sstebz/dstebz and sstein/dstein are called.
//! @param n : The order of the matrix A (n≥ 0). 
//! @param A : A (size max(1, lda*n)) is an array containing either upper or lower triangular part of the Hermitian matrix A, as specified by uplo. On exit, then if succesfull, array A contains the orthonormal eigenvectors of the matrix A.
//! @param lda : The leading dimension of the array A. Must be at least max(1, n).
//! @param vl_and_vu : If range = 'V', the lower and upper bounds of the interval to be searched for eigenvalues. Constraint: vl< vu. If range = 'A' or 'I', vl and vu are not referenced.
//! @param il_and_iu : If range = 'I', the indices in ascending order of the smallest and largest eigenvalues to be returned. Constraint: 1 ≤il≤iu≤n, if n > 0; il=1 and iu=0, if n = 0. If range = 'A' or 'V', il and iu are not referenced.
//! @param abstol : If jobz = 'V', the eigenvalues and eigenvectors output have residual norms bounded by abstol, and the dot products between different eigenvectors are bounded by abstol. If abstol < n *eps*||T||, then n *eps*||T|| is used instead, where eps is the machine precision, and ||T|| is the 1-norm of the matrix T. The eigenvalues are computed to an accuracy of eps*||T|| irrespective of abstol. If high relative accuracy is important, set abstol to ?lamch('S'). 
//! @param ldz : The leading dimension of the output array z. Constraints: ldz≥ 1 if jobz = 'N' and ldz≥ max(1, n) for column major layout and ldz≥ max(1, m) for row major layout if jobz = 'V'.
//! @param A : On exit, the lower triangle (if uplo = 'L') or the upper triangle (if uplo = 'U') of A, including the diagonal, is overwritten.
//! @param m : The total number of eigenvalues found, 0 ≤m≤n. If range = 'A', m = n, if range = 'I', m = iu-il+1, and if range = 'V' the exact value of m is not known in advance.
//! @param W : size at least max(1, n), contains the selected eigenvalues in ascending order, stored in w[0] to w[m - 1]
//! @param Z : z(size max(1, ldz*m) for column major layout and max(1, ldz*n) for row major layout). If jobz = 'V', then if info = 0, the first m columns of z contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues, with the i-th column of z holding the eigenvector associated with w[i - 1]. If jobz = 'N', then z is not referenced.
//! @param isuppz : Array, size at least 2 *max(1, m). The support of the eigenvectors in z, i.e., the indices indicating the nonzero elements in z. The i-th eigenvector is nonzero only in elements isuppz[2i - 2] through isuppz[2i - 1]. Referenced only if eigenvectors are needed (jobz = 'V') and all eigenvalues are needed, that is, range = 'A' or range = 'I' and il = 1 and iu = n.
static void xxevr(char jobz,
                  char range,
                  lapack_int n,
                  float* A,
                  lapack_int lda,
                  float vl,
                  float vu,
                  lapack_int il,
                  lapack_int iu,
                  float abstol,
                  lapack_int* m,
                  float* W,
                  float* Z,
                  lapack_int ldz,
                  lapack_int* isuppz)
{
    int info = LAPACKE_ssyevr(LAPACK_COL_MAJOR, jobz, range, 'U', n, A, lda, vl, vu, il, iu, abstol, m, W, Z, ldz, isuppz);

    if (info > 0) {

        std::stringstream msg("");
        msg << "Error in syevr: " << info << " internal error" << std::endl;
        throw std::runtime_error(msg.str());

    } else if (info < 0) {

        std::stringstream msg("");
        msg << "Error in syev: Invalid input argument " << -info << std::endl;
        throw std::runtime_error(msg.str());

    }
}

static void xxevr(char jobz,
                  char range,
                  lapack_int n,
                  double* A,
                  lapack_int lda,
                  double vl,
                  double vu,
                  lapack_int il,
                  lapack_int iu,
                  double abstol,
                  lapack_int* m,
                  double* W,
                  double* Z,
                  lapack_int ldz,
                  lapack_int* isuppz)
{
    int info = LAPACKE_dsyevr(LAPACK_COL_MAJOR, jobz, range, 'U', n, A, lda, vl, vu, il, iu, abstol, m, W, Z, ldz, isuppz);

    if (info > 0) {

        std::stringstream msg("");
        msg << "Error in syevr: " << info << " internal error" << std::endl;
        throw std::runtime_error(msg.str());

    } else if (info < 0) {

        std::stringstream msg("");
        msg << "Error in syev: Invalid input argument " << -info << std::endl;
        throw std::runtime_error(msg.str());

    }
}

static void xxevr(char jobz,
                  char range,
                  lapack_int n,
                  std::complex<float>* A,
                  lapack_int lda,
                  float vl,
                  float vu,
                  lapack_int il,
                  lapack_int iu,
                  float abstol,
                  lapack_int* m,
                  float* W,
                  std::complex<float>* Z,
                  lapack_int ldz,
                  lapack_int* isuppz)
{
    int info = LAPACKE_cheevr(LAPACK_COL_MAJOR, jobz, range, 'U', n, A, lda, vl, vu, il, iu, abstol, m, W, Z, ldz, isuppz);

    if (info > 0) {

        std::stringstream msg("");
        msg << "Error in heevr: " << info << " internal error" << std::endl;
        throw std::runtime_error(msg.str());

    } else if (info < 0) {

        std::stringstream msg("");
        msg << "Error in heev: Invalid input argument " << -info << std::endl;
        throw std::runtime_error(msg.str());

    }
}

static void xxevr(char jobz,
                  char range,
                  lapack_int n,
                  std::complex<double>* A,
                  lapack_int lda,
                  double vl,
                  double vu,
                  lapack_int il,
                  lapack_int iu,
                  double abstol,
                  lapack_int* m,
                  double* W,
                  std::complex<double>* Z,
                  lapack_int ldz,
                  lapack_int* isuppz)
{
    int info = LAPACKE_zheevr(LAPACK_COL_MAJOR, jobz, range, 'U', n, A, lda, vl, vu, il, iu, abstol, m, W, Z, ldz, isuppz);

    if (info > 0) {

        std::stringstream msg("");
        msg << "Error in heevr: " << info << " internal error" << std::endl;
        throw std::runtime_error(msg.str());

    } else if (info < 0) {

        std::stringstream msg("");
        msg << "Error in heev: Invalid input argument " << -info << std::endl;
        throw std::runtime_error(msg.str());

    }
}

} // END NAMESPACE blas_wrapper

#endif // LA_WRAPPER

