void
cublasCaxpy (int n, cuComplex alpha, const cuComplex *x, int incx,
cuComplex *y, int incy)
multiplies single-complex vector x by single-complex scalar alpha and adds
the result to single-complex vector y; that is, it overwrites single-complex
y with single-complex alpha * x + y.
void
cublasCgbmv (char trans, int m, int n, int kl, int ku, cuComplex alpha,
const cuComplex *A, int lda, const cuComplex *x, int incx, cuComplex beta,
cuComplex *y, int incy);
performs one of the matrix-vector operations
y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A)
alpha and beta are single precision complex scalars. x and y are single precision
complex vectors.
void cublasCgemm (char transa, char transb, int m, int n, int k,
cuComplex alpha, const cuComplex *A, int lda,
const cuComplex *B, int ldb, cuComplex beta,
cuComplex *C, int ldc)
performs one of the matrix-matrix operations
C = alpha * op(A) * op(B) + beta*C,
where op(X) is one of
op(X) = X or op(X) = transpose or op(X) = conjg(transpose(X))
alpha and beta are single-complex scalars, and A, B and C are matrices
consisting of single-complex elements, with op(A) an m x k matrix, op(B)
a k x n matrix and C an m x n matrix.
cublasCgemv (char trans, int m, int n, cuComplex alpha, const cuComplex *A,
int lda, const cuComplex *x, int incx, cuComplex beta, cuComplex *y,
int incy)
performs one of the matrix-vector operations
y = alpha * op(A) * x + beta * y,
where op(A) is one of
op(A) = A or op(A) = transpose(A) or op(A) = conjugate(transpose(A))
where alpha and beta are single precision scalars, x and y are single
precision vectors, and A is an m x n matrix consisting of single precision
elements.
cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x, int incx,
const cuComplex *y, int incy, cuComplex *A, int lda)
performs the symmetric rank 1 operation
A = alpha * x * conjugate(transpose(y)) + A,
where alpha is a single precision complex scalar, x is an m element single
precision complex vector, y is an n element single precision complex vector, and A
is an m by n matrix consisting of single precision complex elements.
cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x, int incx,
const cuComplex *y, int incy, cuComplex *A, int lda)
performs the symmetric rank 1 operation
A = alpha * x * transpose(y) + A,
where alpha is a single precision complex scalar, x is an m element single
precision complex vector, y is an n element single precision complex vector, and A
is an m by n matrix consisting of single precision complex elements.
void
cublasChbmv (char uplo, int n, int k, cuComplex alpha, const cuComplex *A, int lda,
const cuComplex *x, int incx, cuComplex beta, cuComplex *y, int incy)
performs the matrix-vector operation
y := alpha*A*x + beta*y
alpha and beta are single precision complex scalars. x and y are single precision
complex vectors with n elements.
void
cublasChemm (char side, char uplo, int m, int n, cuComplex alpha,
const cuComplex *A, int lda, const cuComplex *B, int ldb,
cuComplex beta, cuComplex *C, int ldc);
performs one of the matrix-matrix operations
C = alpha * A * B + beta * C, or
C = alpha * B * A + beta * C,
where alpha and beta are single precision complex scalars, A is a hermitian matrix
consisting of single precision complex elements and stored in either lower or upper
storage mode, and B and C are m x n matrices consisting of single precision
complex elements.
void
cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A, int lda,
const cuComplex *x, int incx, cuComplex beta, cuComplex *y, int incy)
performs the matrix-vector operation
y = alpha*A*x + beta*y
Alpha and beta are single precision complex scalars, and x and y are single
precision complex vectors, each with n elements.
void
cublasCher (char uplo, int n, float alpha, const cuComplex *x, int incx,
cuComplex *A, int lda)
performs the hermitian rank 1 operation
A = alpha * x * conjugate(transpose(x)) + A,
where alpha is a single precision real scalar, x is an n element single
precision complex vector and A is an n x n hermitian matrix consisting of
single precision complex elements.
void cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x, int incx,
const cuComplex *y, int incy, cuComplex *A, int lda)
performs the hermitian rank 2 operation
A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A,
where alpha is a single precision complex scalar, x and y are n element single
precision complex vector and A is an n by n hermitian matrix consisting of single
precision complex elements.
void
cublasCher2k (char uplo, char trans, int n, int k, cuComplex alpha,
const cuComplex *A, int lda, const cuComplex *B, int ldb,
float beta, cuComplex *C, int ldc)
performs one of the hermitian rank 2k operations
C = alpha * A * conjugate(transpose(B))
+ conjugate(alpha) * B * conjugate(transpose(A))
+ beta * C ,
or
C = alpha * conjugate(transpose(A)) * B
+ conjugate(alpha) * conjugate(transpose(B)) * A
+ beta * C.
void
cublasCherk (char uplo, char trans, int n, int k, float alpha,
const cuComplex *A, int lda, float beta, cuComplex *C, int ldc)
performs one of the hermitian rank k operations
C = alpha * A * conjugate(transpose(A)) + beta * C, or
C = alpha * conjugate(transpose(A)) * A + beta * C.
void
cublasChpr (char uplo, int n, float alpha, const cuComplex *x, int incx,
cuComplex *AP)
performs the hermitian rank 1 operation
A = alpha * x * conjugate(transpose(x)) + A,
where alpha is a single precision real scalar and x is an n element single
precision complex vector.
void
cublasChpr2 (char uplo, int n, cuComplex alpha, const cuComplex *x, int incx,
const cuComplex *y, int incy, cuComplex *AP)
performs the hermitian rank 2 operation
A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A,
where alpha is a single precision complex scalar, and x and y are n element single
precision complex vectors.
void
cublasCrot (int n, cuComplex *x, int incx, cuComplex *y, int incy, float sc,
cuComplex cs)
multiplies a 2x2 matrix ( sc cs) with the 2xn matrix ( transpose(x) )
(-conj(cs) sc) ( transpose(y) )
The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if
incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and
incy.
void
csrot (int n, cuComplex *x, int incx, cuCumplex *y, int incy, float c,
float s)
multiplies a 2x2 rotation matrix ( c s) with a 2xn matrix ( transpose(x) )
(-s c) ( transpose(y) )
The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if
incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and
incy.
void
cublasCswap (int n, const cuComplex *x, int incx, cuComplex *y, int incy)
interchanges the single-complex vector x with the single-complex vector y.
void
cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha,
const cuComplex *A, int lda, const cuComplex *B, int ldb,
cuComplex beta, cuComplex *C, int ldc);
performs one of the matrix-matrix operations
C = alpha * A * B + beta * C, or
C = alpha * B * A + beta * C,
where alpha and beta are single precision complex scalars, A is a symmetric matrix
consisting of single precision complex elements and stored in either lower or upper
storage mode, and B and C are m x n matrices consisting of single precision
complex elements.
void
cublasCsyr2k (char uplo, char trans, int n, int k, cuComplex alpha,
const cuComplex *A, int lda, const cuComplex *B, int ldb,
cuComplex beta, cuComplex *C, int ldc)
performs one of the symmetric rank 2k operations
C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or
C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C.
void
cublasCsyrk (char uplo, char trans, int n, int k, cuComplex alpha,
const cuComplex *A, int lda, cuComplex beta, cuComplex *C, int ldc)
performs one of the symmetric rank k operations
C = alpha * A * transpose(A) + beta * C, or
C = alpha * transpose(A) * A + beta * C.
void
cublasCtbmv (char uplo, char trans, char diag, int n, int k, const cuComplex *A,
int lda, cuComplex *x, int incx)
performs one of the matrix-vector operations x = op(A) * x, where op(A) = A,
op(A) = transpose(A) or op(A) = conjugate(transpose(A)). x is an n-element
single precision complex vector, and A is an n x n, unit or non-unit, upper
or lower triangular band matrix composed of single precision complex elements.
void cublasCtbsv (char uplo, char trans, char diag, int n, int k,
const cuComplex *A, int lda, cuComplex *X, int incx)
solves one of the systems of equations op(A)*x = b, where op(A) is either
op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)).
void
cublasCtpmv (char uplo, char trans, char diag, int n, const cuComplex *AP,
cuComplex *x, int incx);
performs one of the matrix-vector operations x = op(A) * x, where op(A) = A,
op(A) = transpose(A) or op(A) = conjugate(transpose(A)) . x is an n element
single precision complex vector, and A is an n x n, unit or non-unit, upper
or lower triangular matrix composed of single precision complex elements.
void
cublasCtpsv (char uplo, char trans, char diag, int n, const cuComplex *AP,
cuComplex *X, int incx)
solves one of the systems of equations op(A)*x = b, where op(A) is either
op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose)). b and
x are n element complex vectors, and A is an n x n unit or non-unit,
upper or lower triangular matrix.
void
cublasCtrmm (char side, char uplo, char transa, char diag, int m, int n,
cuComplex alpha, const cuComplex *A, int lda, const cuComplex *B,
int ldb)
performs one of the matrix-matrix operations
B = alpha * op(A) * B, or B = alpha * B * op(A)
where alpha is a single-precision complex scalar, B is an m x n matrix composed
of single precision complex elements, and A is a unit or non-unit, upper or lower,
triangular matrix composed of single precision complex elements. op(A) is one of
op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A))
Matrices A and B are stored in column major format, and lda and ldb are
the leading dimensions of the two-dimensonials arrays that contain A and
B, respectively.
cublasCtrmv (char uplo, char trans, char diag, int n, const cuComplex *A,
int lda, cuComplex *x, int incx);
performs one of the matrix-vector operations x = op(A) * x,
where op(A) = A, or op(A) = transpose(A) or op(A) = conjugate(transpose(A)).
void
cublasCtrsm (char side, char uplo, char transa, char diag, int m, int n,
cuComplex alpha, const cuComplex *A, int lda,
cuComplex *B, int ldb)
solves one of the matrix equations
op(A) * X = alpha * B, or X * op(A) = alpha * B,
where alpha is a single precision complex scalar, and X and B are m x n matrices
that are composed of single precision complex elements.
void
cublasCtrsv (char uplo, char trans, char diag, int n, const cuComplex *A,
int lda, cuComplex *x, int incx)
solves a system of equations op(A) * x = b, where op(A) is either A,
transpose(A) or conjugate(transpose(A)). b and x are single precision
complex vectors consisting of n elements, and A is an n x n matrix
composed of a unit or non-unit, upper or lower triangular matrix.
double
cublasDasum (int n, const double *x, int incx)
computes the sum of the absolute values of the elements of double
precision vector x; that is, the result is the sum from i = 0 to n - 1 of
abs(x[1 + i * incx]).
void
cublasDaxpy (int n, double alpha, const double *x, int incx, double *y,
int incy)
multiplies double-precision vector x by double-precision scalar alpha
and adds the result to double-precision vector y; that is, it overwrites
double-precision y with double-precision alpha * x + y.
void
cublasDgbmv (char trans, int m, int n, int kl, int ku, double alpha,
const double *A, int lda, const double *x, int incx, double beta,
double *y, int incy);
performs one of the matrix-vector operations
y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A)
alpha and beta are double precision scalars. x and y are double precision
vectors.
void
cublasDgemm (char transa, char transb, int m, int n, int k, double alpha,
const double *A, int lda, const double *B, int ldb,
double beta, double *C, int ldc)
computes the product of matrix A and matrix B, multiplies the result
by scalar alpha, and adds the sum to the product of matrix C and
scalar beta.
cublasDgemv (char trans, int m, int n, double alpha, const double *A,
int lda, const double *x, int incx, double beta, double *y,
int incy)
performs one of the matrix-vector operations
y = alpha * op(A) * x + beta * y,
where op(A) is one of
op(A) = A or op(A) = transpose(A)
where alpha and beta are double precision scalars, x and y are double
precision vectors, and A is an m x n matrix consisting of double precision
elements.
cublasDger (int m, int n, double alpha, const double *x, int incx,
const double *y, int incy, double *A, int lda)
performs the symmetric rank 1 operation
A = alpha * x * transpose(y) + A,
where alpha is a double precision scalar, x is an m element double
precision vector, y is an n element double precision vector, and A
is an m by n matrix consisting of double precision elements.
void
cublasDrot (int n, double *x, int incx, double *y, int incy, double sc,
double ss)
multiplies a 2x2 matrix ( sc ss) with the 2xn matrix ( transpose(x) )
(-ss sc) ( transpose(y) )
The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if
incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and
incy.
void
cublasDsbmv (char uplo, int n, int k, double alpha, const double *A, int lda,
const double *x, int incx, double beta, double *y, int incy)
performs the matrix-vector operation
y := alpha*A*x + beta*y
alpha and beta are double precision scalars. x and y are double precision
vectors with n elements.
void
cublasDspmv (char uplo, int n, double alpha, const double *AP, const double *x,
int incx, double beta, double *y, int incy)
performs the matrix-vector operation
y = alpha * A * x + beta * y
Alpha and beta are double precision scalars, and x and y are double
precision vectors with n elements.
void
cublasDspr (char uplo, int n, double alpha, const double *x, int incx,
double *AP)
performs the symmetric rank 1 operation
A = alpha * x * transpose(x) + A,
where alpha is a double precision scalar and x is an n element double
precision vector.
void
cublasDspr2 (char uplo, int n, double alpha, const double *x, int incx,
const double *y, int incy, double *AP)
performs the symmetric rank 2 operation
A = alpha*x*transpose(y) + alpha*y*transpose(x) + A,
where alpha is a double precision scalar, and x and y are n element double
precision vectors.
void
cublasDsymm (char side, char uplo, int m, int n, double alpha,
const double *A, int lda, const double *B, int ldb,
double beta, double *C, int ldc);
performs one of the matrix-matrix operations
C = alpha * A * B + beta * C, or
C = alpha * B * A + beta * C,
where alpha and beta are double precision scalars, A is a symmetric matrix
consisting of double precision elements and stored in either lower or upper
storage mode, and B and C are m x n matrices consisting of double precision
elements.
void
cublasDsymv (char uplo, int n, double alpha, const double *A, int lda,
const double *x, int incx, double beta, double *y, int incy)
performs the matrix-vector operation
y = alpha*A*x + beta*y
Alpha and beta are double precision scalars, and x and y are double
precision vectors, each with n elements.
void
cublasDsyr (char uplo, int n, double alpha, const double *x, int incx,
double *A, int lda)
performs the symmetric rank 1 operation
A = alpha * x * transpose(x) + A,
where alpha is a double precision scalar, x is an n element double
precision vector and A is an n x n symmetric matrix consisting of
double precision elements.
void cublasDsyr2 (char uplo, int n, double alpha, const double *x, int incx,
const double *y, int incy, double *A, int lda)
performs the symmetric rank 2 operation
A = alpha*x*transpose(y) + alpha*y*transpose(x) + A,
where alpha is a double precision scalar, x and y are n element double
precision vector and A is an n by n symmetric matrix consisting of double
precision elements.
void
cublasDsyr2k (char uplo, char trans, int n, int k, double alpha,
const double *A, int lda, const double *B, int ldb,
double beta, double *C, int ldc)
performs one of the symmetric rank 2k operations
C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or
C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C.
void
cublasDsyrk (char uplo, char trans, int n, int k, double alpha,
const double *A, int lda, double beta, double *C, int ldc)
performs one of the symmetric rank k operations
C = alpha * A * transpose(A) + beta * C, or
C = alpha * transpose(A) * A + beta * C.
void
cublasDtbmv (char uplo, char trans, char diag, int n, int k, const double *A,
int lda, double *x, int incx)
performs one of the matrix-vector operations x = op(A) * x, where op(A) = A,
or op(A) = transpose(A). x is an n-element double precision vector, and A is
an n x n, unit or non-unit, upper or lower triangular band matrix composed
of double precision elements.
void cublasDtbsv (char uplo, char trans, char diag, int n, int k,
const double *A, int lda, double *X, int incx)
solves one of the systems of equations op(A)*x = b, where op(A) is either
op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A is
an n x n unit or non-unit, upper or lower triangular band matrix with k + 1
diagonals.
void
cublasDtpmv (char uplo, char trans, char diag, int n, const double *AP,
double *x, int incx);
performs one of the matrix-vector operations x = op(A) * x, where op(A) = A,
or op(A) = transpose(A). x is an n element double precision vector, and A
is an n x n, unit or non-unit, upper or lower triangular matrix composed
of double precision elements.
void
cublasDtpsv (char uplo, char trans, char diag, int n, const double *AP,
double *X, int incx)
solves one of the systems of equations op(A)*x = b, where op(A) is either
op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A is
an n x n unit or non-unit, upper or lower triangular matrix.
void
cublasDtrmm (char side, char uplo, char transa, char diag, int m, int n,
double alpha, const double *A, int lda, const double *B, int ldb)
performs one of the matrix-matrix operations
B = alpha * op(A) * B, or B = alpha * B * op(A)
where alpha is a double-precision scalar, B is an m x n matrix composed
of double precision elements, and A is a unit or non-unit, upper or lower,
triangular matrix composed of double precision elements. op(A) is one of
op(A) = A or op(A) = transpose(A)
Matrices A and B are stored in column major format, and lda and ldb are
the leading dimensions of the two-dimensonials arrays that contain A and
B, respectively.
void
cublasDtrmv (char uplo, char trans, char diag, int n, const double *A,
int lda, double *x, int incx);
performs one of the matrix-vector operations x = op(A) * x, where op(A) =
= A, or op(A) = transpose(A). x is an n-element single precision vector, and
A is an n x n, unit or non-unit, upper or lower, triangular matrix composed
of single precision elements.
void
cublasDtrsm (char side, char uplo, char transa, char diag, int m, int n,
double alpha, const double *A, int lda, double *B, int ldb)
solves one of the matrix equations
op(A) * X = alpha * B, or X * op(A) = alpha * B,
where alpha is a double precision scalar, and X and B are m x n matrices
that are composed of double precision elements.
void
cublasDtrsv (char uplo, char trans, char diag, int n, const double *A,
int lda, double *x, int incx)
solves a system of equations op(A) * x = b, where op(A) is either A or
transpose(A). b and x are double precision vectors consisting of n
elements, and A is an n x n matrix composed of a unit or non-unit, upper
or lower triangular matrix.
double
cublasDzasum (int n, const cuDoubleComplex *x, int incx)
takes the sum of the absolute values of a complex vector and returns a
double precision result.
cublasStatus_t
cublasGetMatrix (int rows, int cols, int elemSize, const void *A,
int lda, void *B, int ldb)
copies a tile of rows x cols elements from a matrix A in GPU memory
space to a matrix B in CPU memory space.
cublasStatus_t
cublasGetMatrixAsync (int rows, int cols, int elemSize, const void *A,
int lda, void *B, int ldb, cudaStream_t stream)
cublasGetMatrixAsync has the same functionnality as cublasGetMatrix
but the transfer is done asynchronously within the CUDA stream passed
in parameter.
cublasStatus_t
cublasGetVector (int n, int elemSize, const void *x, int incx,
void *y, int incy)
copies n elements from a vector x in GPU memory space to a vector y
in CPU memory space.
cublasStatus
cublasGetVectorAsync( int n, int elemSize, const void *x, int incx,
void *y, int incy, cudaStream_t stream)
cublasGetVectorAsync has the same functionnality as cublasGetVector
but the transfer is done asynchronously within the CUDA stream passed
in parameter.
int
cublasIcamax (int n, const float *x, int incx)
finds the smallest index of the element having maximum absolute value
in single-complex vector x; that is, the result is the first i, i = 0
to n - 1 that maximizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])).
int
cublasIcamin (int n, const float *x, int incx)
finds the smallest index of the element having minimum absolute value
in single-complex vector x; that is, the result is the first i, i = 0
to n - 1 that minimizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])).
int
idamax (int n, const double *x, int incx)
finds the smallest index of the maximum magnitude element of double-
precision vector x; that is, the result is the first i, i = 0 to n - 1,
that maximizes abs(x[1 + i * incx])).
int
idamin (int n, const double *x, int incx)
finds the smallest index of the minimum magnitude element of double-
precision vector x; that is, the result is the first i, i = 0 to n - 1,
that minimizes abs(x[1 + i * incx])).
int
cublasIsamax (int n, const float *x, int incx)
finds the smallest index of the maximum magnitude element of single
precision vector x; that is, the result is the first i, i = 0 to n - 1,
that maximizes abs(x[1 + i * incx])).
int
cublasIsamin (int n, const float *x, int incx)
finds the smallest index of the minimum magnitude element of single
precision vector x; that is, the result is the first i, i = 0 to n - 1,
that minimizes abs(x[1 + i * incx])).
int
cublasIzamax (int n, const double *x, int incx)
finds the smallest index of the element having maximum absolute value
in double-complex vector x; that is, the result is the first i, i = 0
to n - 1 that maximizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])).
int
cublasIzamin (int n, const cuDoubleComplex *x, int incx)
finds the smallest index of the element having minimum absolute value
in double-complex vector x; that is, the result is the first i, i = 0
to n - 1 that minimizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])).
float
cublasSasum (int n, const float *x, int incx)
computes the sum of the absolute values of the elements of single
precision vector x; that is, the result is the sum from i = 0 to n - 1 of
abs(x[1 + i * incx]).
void
cublasSaxpy (int n, float alpha, const float *x, int incx, float *y,
int incy)
multiplies single precision vector x by single precision scalar alpha
and adds the result to single precision vector y; that is, it overwrites
single precision y with single precision alpha * x + y.
float
cublasScasum (int n, const cuDouble *x, int incx)
takes the sum of the absolute values of a complex vector and returns a
single precision result.
cublasStatus_t
cublasSetMatrix (int rows, int cols, int elemSize, const void *A,
int lda, void *B, int ldb)
copies a tile of rows x cols elements from a matrix A in CPU memory
space to a matrix B in GPU memory space.
cublasStatus_t
cublasSetMatrixAsync (int rows, int cols, int elemSize, const void *A,
int lda, void *B, int ldb, cudaStream_t stream)
cublasSetMatrixAsync has the same functionnality as cublasSetMatrix
but the transfer is done asynchronously within the CUDA stream passed
in parameter.
cublasStatus_t
cublasSetVector (int n, int elemSize, const void *x, int incx,
void *y, int incy)
copies n elements from a vector x in CPU memory space to a vector y
in GPU memory space.
cublasStatus
cublasSetVectorAsync ( int n, int elemSize, const void *x, int incx,
void *y, int incy, cudaStream_t stream );
cublasSetVectorAsync has the same functionnality as cublasSetVector
but the transfer is done asynchronously within the CUDA stream passed
in parameter.
void
cublasSgbmv (char trans, int m, int n, int kl, int ku, float alpha,
const float *A, int lda, const float *x, int incx, float beta,
float *y, int incy)
performs one of the matrix-vector operations
y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A)
alpha and beta are single precision scalars. x and y are single precision
vectors.
void
cublasSgemm (char transa, char transb, int m, int n, int k, float alpha,
const float *A, int lda, const float *B, int ldb, float beta,
float *C, int ldc)
computes the product of matrix A and matrix B, multiplies the result
by a scalar alpha, and adds the sum to the product of matrix C and
scalar beta. sgemm() performs one of the matrix-matrix operations:
C = alpha * op(A) * op(B) + beta * C,
where op(X) is one of
op(X) = X or op(X) = transpose(X)
alpha and beta are single precision scalars, and A, B and C are
matrices consisting of single precision elements, with op(A) an m x k
matrix, op(B) a k x n matrix, and C an m x n matrix.
cublasSgemv (char trans, int m, int n, float alpha, const float *A, int lda,
const float *x, int incx, float beta, float *y, int incy)
performs one of the matrix-vector operations
y = alpha * op(A) * x + beta * y,
where op(A) is one of
op(A) = A or op(A) = transpose(A)
where alpha and beta are single precision scalars, x and y are single
precision vectors, and A is an m x n matrix consisting of single precision
elements.
cublasSger (int m, int n, float alpha, const float *x, int incx,
const float *y, int incy, float *A, int lda)
performs the symmetric rank 1 operation
A = alpha * x * transpose(y) + A,
where alpha is a single precision scalar, x is an m element single
precision vector, y is an n element single precision vector, and A
is an m by n matrix consisting of single precision elements.
void
cublasSrot (int n, float *x, int incx, float *y, int incy, float sc,
float ss)
multiplies a 2x2 matrix ( sc ss) with the 2xn matrix ( transpose(x) )
(-ss sc) ( transpose(y) )
The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if
incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and
incy.
void
cublasSsbmv (char uplo, int n, int k, float alpha, const float *A, int lda,
const float *x, int incx, float beta, float *y, int incy)
performs the matrix-vector operation
y := alpha*A*x + beta*y
alpha and beta are single precision scalars. x and y are single precision
vectors with n elements.
void
cublasSspmv (char uplo, int n, float alpha, const float *AP, const float *x,
int incx, float beta, float *y, int incy)
performs the matrix-vector operation
y = alpha * A * x + beta * y
Alpha and beta are single precision scalars, and x and y are single
precision vectors with n elements.
void
cublasSspr (char uplo, int n, float alpha, const float *x, int incx,
float *AP)
performs the symmetric rank 1 operation
A = alpha * x * transpose(x) + A,
where alpha is a single precision scalar and x is an n element single
precision vector.
void
cublasSspr2 (char uplo, int n, float alpha, const float *x, int incx,
const float *y, int incy, float *AP)
performs the symmetric rank 2 operation
A = alpha*x*transpose(y) + alpha*y*transpose(x) + A,
where alpha is a single precision scalar, and x and y are n element single
precision vectors.
void
cublasSsymm (char side, char uplo, int m, int n, float alpha,
const float *A, int lda, const float *B, int ldb,
float beta, float *C, int ldc);
performs one of the matrix-matrix operations
C = alpha * A * B + beta * C, or
C = alpha * B * A + beta * C,
where alpha and beta are single precision scalars, A is a symmetric matrix
consisting of single precision elements and stored in either lower or upper
storage mode, and B and C are m x n matrices consisting of single precision
elements.
void
cublasSsymv (char uplo, int n, float alpha, const float *A, int lda,
const float *x, int incx, float beta, float *y, int incy)
performs the matrix-vector operation
y = alpha*A*x + beta*y
Alpha and beta are single precision scalars, and x and y are single
precision vectors, each with n elements.
void
cublasSsyr (char uplo, int n, float alpha, const float *x, int incx,
float *A, int lda)
performs the symmetric rank 1 operation
A = alpha * x * transpose(x) + A,
where alpha is a single precision scalar, x is an n element single
precision vector and A is an n x n symmetric matrix consisting of
single precision elements.
void
cublasSsyr2 (char uplo, int n, float alpha, const float *x, int incx,
const float *y, int incy, float *A, int lda)
performs the symmetric rank 2 operation
A = alpha*x*transpose(y) + alpha*y*transpose(x) + A,
where alpha is a single precision scalar, x and y are n element single
precision vector and A is an n by n symmetric matrix consisting of single
precision elements.
void
cublasSsyr2k (char uplo, char trans, int n, int k, float alpha,
const float *A, int lda, const float *B, int ldb,
float beta, float *C, int ldc)
performs one of the symmetric rank 2k operations
C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or
C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C.
void
cublasSsyrk (char uplo, char trans, int n, int k, float alpha,
const float *A, int lda, float beta, float *C, int ldc)
performs one of the symmetric rank k operations
C = alpha * A * transpose(A) + beta * C, or
C = alpha * transpose(A) * A + beta * C.
void
cublasStbmv (char uplo, char trans, char diag, int n, int k, const float *A,
int lda, float *x, int incx)
performs one of the matrix-vector operations x = op(A) * x, where op(A) = A
or op(A) = transpose(A). x is an n-element single precision vector, and A is
an n x n, unit or non-unit upper or lower triangular band matrix consisting
of single precision elements.
void cublasStbsv (char uplo, char trans, char diag, int n, int k,
const float *A, int lda, float *X, int incx)
solves one of the systems of equations op(A)*x = b, where op(A) is either
op(A) = A or op(A) = transpose(A). b and x are n-element vectors, and A is
an n x n unit or non-unit, upper or lower triangular band matrix with k + 1
diagonals.
void
cublasStpmv (char uplo, char trans, char diag, int n, const float *AP,
float *x, int incx);
performs one of the matrix-vector operations x = op(A) * x, where op(A) = A,
or op(A) = transpose(A). x is an n element single precision vector, and A
is an n x n, unit or non-unit, upper or lower triangular matrix composed
of single precision elements.
void
cublasStpsv (char uplo, char trans, char diag, int n, const float *AP,
float *X, int incx)
solves one of the systems of equations op(A)*x = b, where op(A) is either
op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A is
an n x n unit or non-unit, upper or lower triangular matrix.
void
cublasStrmm (char side, char uplo, char transa, char diag, int m, int n,
float alpha, const float *A, int lda, const float *B, int ldb)
performs one of the matrix-matrix operations
B = alpha * op(A) * B, or B = alpha * B * op(A)
where alpha is a single-precision scalar, B is an m x n matrix composed
of single precision elements, and A is a unit or non-unit, upper or lower,
triangular matrix composed of single precision elements. op(A) is one of
op(A) = A or op(A) = transpose(A)
Matrices A and B are stored in column major format, and lda and ldb are
the leading dimensions of the two-dimensonials arrays that contain A and
B, respectively.
void
cublasStrmv (char uplo, char trans, char diag, int n, const float *A,
int lda, float *x, int incx);
performs one of the matrix-vector operations x = op(A) * x, where op(A) =
= A, or op(A) = transpose(A). x is an n-element single precision vector, and
A is an n x n, unit or non-unit, upper or lower, triangular matrix composed
of single precision elements.
void
cublasStrsm (char side, char uplo, char transa, char diag, int m, int n,
float alpha, const float *A, int lda, float *B, int ldb)
solves one of the matrix equations
op(A) * X = alpha * B, or X * op(A) = alpha * B,
where alpha is a single precision scalar, and X and B are m x n matrices
that are composed of single precision elements.
void
cublasStrsv (char uplo, char trans, char diag, int n, const float *A,
int lda, float *x, int incx)
solves a system of equations op(A) * x = b, where op(A) is either A or
transpose(A). b and x are single precision vectors consisting of n
elements, and A is an n x n matrix composed of a unit or non-unit, upper
or lower triangular matrix.
void
cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx,
cuDoubleComplex *y, int incy)
multiplies double-complex vector x by double-complex scalar alpha and adds
the result to double-complex vector y; that is, it overwrites double-complex
y with double-complex alpha * x + y.
void
cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy)
copies the double-complex vector x to the double-complex vector y.
cuDoubleComplex
cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, int incy)
computes the dot product of two double-precision complex vectors.
cuDoubleComplex
zdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, int incy)
computes the dot product of two double-complex vectors.
void
zdrot (int n, cuDoubleComplex *x, int incx, cuCumplex *y, int incy, double c,
double s)
multiplies a 2x2 matrix ( c s) with the 2xn matrix ( transpose(x) )
(-s c) ( transpose(y) )
The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if
incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and
incy.
void
cublasZgbmv (char trans, int m, int n, int kl, int ku, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, cuDoubleComplex beta,
cuDoubleComplex *y, int incy);
performs one of the matrix-vector operations
y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A)
alpha and beta are double precision complex scalars. x and y are double precision
complex vectors.
void cublasZgemm (char transa, char transb, int m, int n, int k,
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
cuDoubleComplex *C, int ldc)
zgemm performs one of the matrix-matrix operations
C = alpha * op(A) * op(B) + beta*C,
where op(X) is one of
op(X) = X or op(X) = transpose or op(X) = conjg(transpose(X))
alpha and beta are double-complex scalars, and A, B and C are matrices
consisting of double-complex elements, with op(A) an m x k matrix, op(B)
a k x n matrix and C an m x n matrix.
cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy)
performs one of the matrix-vector operations
y = alpha * op(A) * x + beta * y,
where op(A) is one of
op(A) = A or op(A) = transpose(A)
where alpha and beta are double precision scalars, x and y are double
precision vectors, and A is an m x n matrix consisting of double precision
elements.
cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx,
const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int lda)
performs the symmetric rank 1 operation
A = alpha * x * conjugate(transpose(y)) + A,
where alpha is a double precision complex scalar, x is an m element double
precision complex vector, y is an n element double precision complex vector, and A
is an m by n matrix consisting of double precision complex elements.
cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx,
const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int lda)
performs the symmetric rank 1 operation
A = alpha * x * transpose(y) + A,
where alpha is a double precision complex scalar, x is an m element double
precision complex vector, y is an n element double precision complex vector, and A
is an m by n matrix consisting of double precision complex elements.
void
cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy)
performs the matrix-vector operation
y := alpha*A*x + beta*y
alpha and beta are double precision complex scalars. x and y are double precision
complex vectors with n elements.
void
cublasZhemm (char side, char uplo, int m, int n, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
cuDoubleComplex beta, cuDoubleComplex *C, int ldc);
performs one of the matrix-matrix operations
C = alpha * A * B + beta * C, or
C = alpha * B * A + beta * C,
where alpha and beta are double precision complex scalars, A is a hermitian matrix
consisting of double precision complex elements and stored in either lower or upper
storage mode, and B and C are m x n matrices consisting of double precision
complex elements.
void
cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy)
performs the matrix-vector operation
y = alpha*A*x + beta*y
Alpha and beta are double precision complex scalars, and x and y are double
precision complex vectors, each with n elements.
void
cublasZher (char uplo, int n, double alpha, const cuDoubleComplex *x, int incx,
cuDoubleComplex *A, int lda)
performs the hermitian rank 1 operation
A = alpha * x * conjugate(transpose(x) + A,
where alpha is a double precision real scalar, x is an n element double
precision complex vector and A is an n x n hermitian matrix consisting of
double precision complex elements.
void cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx,
const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int lda)
performs the hermitian rank 2 operation
A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A,
where alpha is a double precision complex scalar, x and y are n element double
precision complex vector and A is an n by n hermitian matrix consisting of double
precision complex elements.
void
cublasZher2k (char uplo, char trans, int n, int k, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
double beta, cuDoubleComplex *C, int ldc)
performs one of the hermitian rank 2k operations
C = alpha * A * conjugate(transpose(B))
+ conjugate(alpha) * B * conjugate(transpose(A))
+ beta * C ,
or
C = alpha * conjugate(transpose(A)) * B
+ conjugate(alpha) * conjugate(transpose(B)) * A
+ beta * C.
void
cublasZherk (char uplo, char trans, int n, int k, double alpha,
const cuDoubleComplex *A, int lda, double beta, cuDoubleComplex *C, int ldc)
performs one of the hermitian rank k operations
C = alpha * A * conjugate(transpose(A)) + beta * C, or
C = alpha * conjugate(transpose(A)) * A + beta * C.
void
cublasZhpmv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *AP, const cuDoubleComplex *x,
int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy)
performs the matrix-vector operation
y = alpha * A * x + beta * y
Alpha and beta are double precision complex scalars, and x and y are double
precision complex vectors with n elements.
void
cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x, int incx,
cuDoubleComplex *AP)
performs the hermitian rank 1 operation
A = alpha * x * conjugate(transpose(x)) + A,
where alpha is a double precision real scalar and x is an n element double
precision complex vector.
void
cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx,
const cuDoubleComplex *y, int incy, cuDoubleComplex *AP)
performs the hermitian rank 2 operation
A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A,
where alpha is a double precision complex scalar, and x and y are n element double
precision complex vectors.
cublasZrot (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy, double sc,
cuDoubleComplex cs)
multiplies a 2x2 matrix ( sc cs) with the 2xn matrix ( transpose(x) )
(-conj(cs) sc) ( transpose(y) )
The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if
incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and
incy.
void
cublasZswap (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy)
interchanges the double-complex vector x with the double-complex vector y.
void
cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
cuDoubleComplex beta, cuDoubleComplex *C, int ldc);
performs one of the matrix-matrix operations
C = alpha * A * B + beta * C, or
C = alpha * B * A + beta * C,
where alpha and beta are double precision complex scalars, A is a symmetric matrix
consisting of double precision complex elements and stored in either lower or upper
storage mode, and B and C are m x n matrices consisting of double precision
complex elements.
void
cublasZsyr2k (char uplo, char trans, int n, int k, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
cuDoubleComplex beta, cuDoubleComplex *C, int ldc)
performs one of the symmetric rank 2k operations
C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or
C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C.
void
cublasZsyrk (char uplo, char trans, int n, int k, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, cuDoubleComplex beta, cuDoubleComplex *C, int ldc)
performs one of the symmetric rank k operations
C = alpha * A * transpose(A) + beta * C, or
C = alpha * transpose(A) * A + beta * C.
void
cublasZtbmv (char uplo, char trans, char diag, int n, int k, const cuDoubleComplex *A,
int lda, cuDoubleComplex *x, int incx)
performs one of the matrix-vector operations x = op(A) * x, where op(A) = A,
op(A) = transpose(A) or op(A) = conjugate(transpose(A)). x is an n-element
double precision complex vector, and A is an n x n, unit or non-unit, upper
or lower triangular band matrix composed of double precision complex elements.
void cublasZtbsv (char uplo, char trans, char diag, int n, int k,
const cuDoubleComplex *A, int lda, cuDoubleComplex *X, int incx)
solves one of the systems of equations op(A)*x = b, where op(A) is either
op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)).
void
cublasZtpmv (char uplo, char trans, char diag, int n, const cuDoubleComplex *AP,
cuDoubleComplex *x, int incx);
performs one of the matrix-vector operations x = op(A) * x, where op(A) = A,
op(A) = transpose(A) or op(A) = conjugate(transpose(A)) . x is an n element
double precision complex vector, and A is an n x n, unit or non-unit, upper
or lower triangular matrix composed of double precision complex elements.
void
cublasZtpsv (char uplo, char trans, char diag, int n, const cuDoubleComplex *AP,
cuDoubleComplex *X, int incx)
solves one of the systems of equations op(A)*x = b, where op(A) is either
op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose)). b and
x are n element complex vectors, and A is an n x n unit or non-unit,
upper or lower triangular matrix.
void
cublasZtrmm (char side, char uplo, char transa, char diag, int m, int n,
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *B,
int ldb)
performs one of the matrix-matrix operations
B = alpha * op(A) * B, or B = alpha * B * op(A)
where alpha is a double-precision complex scalar, B is an m x n matrix composed
of double precision complex elements, and A is a unit or non-unit, upper or lower,
triangular matrix composed of double precision complex elements. op(A) is one of
op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A))
Matrices A and B are stored in column major format, and lda and ldb are
the leading dimensions of the two-dimensonials arrays that contain A and
B, respectively.
void
cublasZtrmv (char uplo, char trans, char diag, int n, const cuDoubleComplex *A,
int lda, cuDoubleComplex *x, int incx);
performs one of the matrix-vector operations x = op(A) * x,
where op(A) = A, or op(A) = transpose(A) or op(A) = conjugate(transpose(A)).
void
cublasZtrsm (char side, char uplo, char transa, char diag, int m, int n,
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
cuDoubleComplex *B, int ldb)
solves one of the matrix equations
op(A) * X = alpha * B, or X * op(A) = alpha * B,
where alpha is a double precision complex scalar, and X and B are m x n matrices
that are composed of double precision complex elements.
void
cublasZtrsv (char uplo, char trans, char diag, int n, const cuDoubleComplex *A,
int lda, cuDoubleComplex *x, int incx)
solves a system of equations op(A) * x = b, where op(A) is either A,
transpose(A) or conjugate(transpose(A)). b and x are double precision
complex vectors consisting of n elements, and A is an n x n matrix
composed of a unit or non-unit, upper or lower triangular matrix.