#include <jitify.hpp>
|
| KernelInstantiation (Kernel const &kernel, std::vector< std::string > const &template_args) |
|
| operator CUfunction () const |
|
std::string | serialize () const |
|
KernelLauncher | configure (dim3 grid, dim3 block, unsigned int smem=0, cudaStream_t stream=0) const |
|
KernelLauncher | configure_1d_max_occupancy (int max_block_size=0, unsigned int smem=0, CUoccupancyB2DSize smem_callback=0, cudaStream_t stream=0, unsigned int flags=0) const |
|
int | get_func_attribute (CUfunction_attribute attribute) const |
|
void | set_func_attribute (CUfunction_attribute attribute, int value) const |
|
CUdeviceptr | get_constant_ptr (const char *name, size_t *size=nullptr) const |
|
CUdeviceptr | get_global_ptr (const char *name, size_t *size=nullptr) const |
|
template<typename T > |
CUresult | get_global_array (const char *name, T *data, size_t count, CUstream stream=0) const |
|
template<typename T > |
CUresult | get_global_value (const char *name, T *value, CUstream stream=0) const |
|
template<typename T > |
CUresult | set_global_array (const char *name, const T *data, size_t count, CUstream stream=0) const |
|
template<typename T > |
CUresult | set_global_value (const char *name, const T &value, CUstream stream=0) const |
|
const std::string & | mangled_name () const |
|
const std::string & | ptx () const |
|
const std::vector< std::string > & | link_files () const |
|
const std::vector< std::string > & | link_paths () const |
|
◆ KernelInstantiation()
jitify::experimental::KernelInstantiation::KernelInstantiation |
( |
Kernel const & |
kernel, |
|
|
std::vector< std::string > const & |
template_args |
|
) |
| |
|
inline |
◆ configure()
KernelLauncher jitify::experimental::KernelInstantiation::configure |
( |
dim3 |
grid, |
|
|
dim3 |
block, |
|
|
unsigned int |
smem = 0 , |
|
|
cudaStream_t |
stream = 0 |
|
) |
| const |
|
inline |
Configure the kernel launch.
- Parameters
-
grid | The thread grid dimensions for the launch. |
block | The thread block dimensions for the launch. |
smem | The amount of shared memory to dynamically allocate, in bytes. |
stream | The CUDA stream to launch the kernel in. |
References KernelLauncher.
Referenced by configure_1d_max_occupancy().
◆ configure_1d_max_occupancy()
KernelLauncher jitify::experimental::KernelInstantiation::configure_1d_max_occupancy |
( |
int |
max_block_size = 0 , |
|
|
unsigned int |
smem = 0 , |
|
|
CUoccupancyB2DSize |
smem_callback = 0 , |
|
|
cudaStream_t |
stream = 0 , |
|
|
unsigned int |
flags = 0 |
|
) |
| const |
|
inline |
Configure the kernel launch with a 1-dimensional block and grid chosen automatically to maximise occupancy.
- Parameters
-
max_block_size | The upper limit on the block size, or 0 for no limit. |
smem | The amount of shared memory to dynamically allocate, in bytes. |
smem_callback | A function returning smem for a given block size (overrides smem ). |
stream | The CUDA stream to launch the kernel in. |
flags | The flags to pass to cuOccupancyMaxPotentialBlockSizeWithFlags. |
References configure().
◆ deserialize()
static KernelInstantiation jitify::experimental::KernelInstantiation::deserialize |
( |
std::string const & |
serialized_kernel_inst | ) |
|
|
inlinestatic |
◆ get_constant_ptr()
CUdeviceptr jitify::experimental::KernelInstantiation::get_constant_ptr |
( |
const char * |
name, |
|
|
size_t * |
size = nullptr |
|
) |
| const |
|
inline |
◆ get_func_attribute()
int jitify::experimental::KernelInstantiation::get_func_attribute |
( |
CUfunction_attribute |
attribute | ) |
const |
|
inline |
◆ get_global_array()
template<typename T >
CUresult jitify::experimental::KernelInstantiation::get_global_array |
( |
const char * |
name, |
|
|
T * |
data, |
|
|
size_t |
count, |
|
|
CUstream |
stream = 0 |
|
) |
| const |
|
inline |
◆ get_global_ptr()
CUdeviceptr jitify::experimental::KernelInstantiation::get_global_ptr |
( |
const char * |
name, |
|
|
size_t * |
size = nullptr |
|
) |
| const |
|
inline |
◆ get_global_value()
template<typename T >
CUresult jitify::experimental::KernelInstantiation::get_global_value |
( |
const char * |
name, |
|
|
T * |
value, |
|
|
CUstream |
stream = 0 |
|
) |
| const |
|
inline |
◆ link_files()
const std::vector<std::string>& jitify::experimental::KernelInstantiation::link_files |
( |
| ) |
const |
|
inline |
◆ link_paths()
const std::vector<std::string>& jitify::experimental::KernelInstantiation::link_paths |
( |
| ) |
const |
|
inline |
◆ mangled_name()
const std::string& jitify::experimental::KernelInstantiation::mangled_name |
( |
| ) |
const |
|
inline |
◆ operator CUfunction()
jitify::experimental::KernelInstantiation::operator CUfunction |
( |
| ) |
const |
|
inline |
Implicit conversion to the underlying CUfunction object.
- Note
- This allows use of CUDA APIs like cuOccupancyMaxActiveBlocksPerMultiprocessor.
◆ ptx()
const std::string& jitify::experimental::KernelInstantiation::ptx |
( |
| ) |
const |
|
inline |
◆ serialize()
std::string jitify::experimental::KernelInstantiation::serialize |
( |
| ) |
const |
|
inline |
◆ set_func_attribute()
void jitify::experimental::KernelInstantiation::set_func_attribute |
( |
CUfunction_attribute |
attribute, |
|
|
int |
value |
|
) |
| const |
|
inline |
◆ set_global_array()
template<typename T >
CUresult jitify::experimental::KernelInstantiation::set_global_array |
( |
const char * |
name, |
|
|
const T * |
data, |
|
|
size_t |
count, |
|
|
CUstream |
stream = 0 |
|
) |
| const |
|
inline |
◆ set_global_value()
template<typename T >
CUresult jitify::experimental::KernelInstantiation::set_global_value |
( |
const char * |
name, |
|
|
const T & |
value, |
|
|
CUstream |
stream = 0 |
|
) |
| const |
|
inline |
◆ KernelLauncher
The documentation for this class was generated from the following file:
- /nebulastream/nes-runtime/include/Util/jitify/jitify.hpp