#include <jitify.hpp>

Public Member Functions
	KernelInstantiation (Kernel const &kernel, std::vector< std::string > const &template_args)

	operator CUfunction () const

std::string	serialize () const

KernelLauncher	configure (dim3 grid, dim3 block, unsigned int smem=0, cudaStream_t stream=0) const

KernelLauncher	configure_1d_max_occupancy (int max_block_size=0, unsigned int smem=0, CUoccupancyB2DSize smem_callback=0, cudaStream_t stream=0, unsigned int flags=0) const

int	get_func_attribute (CUfunction_attribute attribute) const

void	set_func_attribute (CUfunction_attribute attribute, int value) const

CUdeviceptr	get_constant_ptr (const char name, size_t size=nullptr) const

CUdeviceptr	get_global_ptr (const char name, size_t size=nullptr) const

template<typename T >
CUresult	get_global_array (const char name, T data, size_t count, CUstream stream=0) const

template<typename T >
CUresult	get_global_value (const char name, T value, CUstream stream=0) const

template<typename T >
CUresult	set_global_array (const char name, const T data, size_t count, CUstream stream=0) const

template<typename T >
CUresult	set_global_value (const char *name, const T &value, CUstream stream=0) const

const std::string &	mangled_name () const

const std::string &	ptx () const

const std::vector< std::string > &	link_files () const

const std::vector< std::string > &	link_paths () const

Static Public Member Functions
static KernelInstantiation	deserialize (std::string const &serialized_kernel_inst)

Friends
class	KernelLauncher

Constructor & Destructor Documentation

◆ KernelInstantiation()

jitify::experimental::KernelInstantiation::KernelInstantiation	(	Kernel const &	kernel,
		std::vector< std::string > const &	template_args
	)

inline

References ptx().

Here is the call graph for this function:

Member Function Documentation

◆ configure()

KernelLauncher jitify::experimental::KernelInstantiation::configure	(	dim3	grid,
		dim3	block,
		unsigned int	smem = `0`,
		cudaStream_t	stream = `0`
	)		const

inline

Configure the kernel launch.

Parameters

grid	The thread grid dimensions for the launch.
block	The thread block dimensions for the launch.
smem	The amount of shared memory to dynamically allocate, in bytes.
stream	The CUDA stream to launch the kernel in.

References KernelLauncher.

Referenced by configure_1d_max_occupancy().

Here is the caller graph for this function:

◆ configure_1d_max_occupancy()

KernelLauncher jitify::experimental::KernelInstantiation::configure_1d_max_occupancy	(	int	max_block_size = `0`,
		unsigned int	smem = `0`,
		CUoccupancyB2DSize	smem_callback = `0`,
		cudaStream_t	stream = `0`,
		unsigned int	flags = `0`
	)		const

inline

Configure the kernel launch with a 1-dimensional block and grid chosen automatically to maximise occupancy.

Parameters

max_block_size	The upper limit on the block size, or 0 for no limit.
smem	The amount of shared memory to dynamically allocate, in bytes.
smem_callback	A function returning smem for a given block size (overrides `smem`).
stream	The CUDA stream to launch the kernel in.
flags	The flags to pass to cuOccupancyMaxPotentialBlockSizeWithFlags.

References configure().

Here is the call graph for this function:

◆ deserialize()

static KernelInstantiation jitify::experimental::KernelInstantiation::deserialize ( std::string const & serialized_kernel_inst )

inlinestatic

Restore a serialized kernel instantiation.

Parameters

serialized_kernel_inst The serialized kernel instantiation to restore.

See also: serialize

References jitify::experimental::serialization::deserialize(), link_files(), link_paths(), and ptx().

Here is the call graph for this function:

◆ get_constant_ptr()

CUdeviceptr jitify::experimental::KernelInstantiation::get_constant_ptr	(	const char *	name,
		size_t *	size = `nullptr`
	)		const

inline

References get_global_ptr(), and size().

Here is the call graph for this function:

◆ get_func_attribute()

int jitify::experimental::KernelInstantiation::get_func_attribute ( CUfunction_attribute attribute ) const

inline

◆ get_global_array()

template<typename T >

CUresult jitify::experimental::KernelInstantiation::get_global_array	(	const char *	name,
		T *	data,
		size_t	count,
		CUstream	stream = `0`
	)		const

inline

References data.

Referenced by get_global_value().

Here is the caller graph for this function:

◆ get_global_ptr()

CUdeviceptr jitify::experimental::KernelInstantiation::get_global_ptr	(	const char *	name,
		size_t *	size = `nullptr`
	)		const

inline

References size().

Referenced by get_constant_ptr().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ get_global_value()

template<typename T >

CUresult jitify::experimental::KernelInstantiation::get_global_value	(	const char *	name,
		T *	value,
		CUstream	stream = `0`
	)		const

inline

References get_global_array(), and magic_enum::detail::value().

Here is the call graph for this function:

◆ link_files()

const std::vector<std::string>& jitify::experimental::KernelInstantiation::link_files ( ) const

inline

Referenced by deserialize().

Here is the caller graph for this function:

◆ link_paths()

const std::vector<std::string>& jitify::experimental::KernelInstantiation::link_paths ( ) const

inline

Referenced by deserialize().

Here is the caller graph for this function:

◆ mangled_name()

const std::string& jitify::experimental::KernelInstantiation::mangled_name ( ) const

inline

◆ operator CUfunction()

jitify::experimental::KernelInstantiation::operator CUfunction ( ) const

inline

Implicit conversion to the underlying CUfunction object.

Note: This allows use of CUDA APIs like cuOccupancyMaxActiveBlocksPerMultiprocessor.

◆ ptx()

const std::string& jitify::experimental::KernelInstantiation::ptx ( ) const

inline

Referenced by deserialize(), and KernelInstantiation().

Here is the caller graph for this function:

◆ serialize()

std::string jitify::experimental::KernelInstantiation::serialize ( ) const

inline

Save the program.

See also: deserialize

References jitify::experimental::serialization::serialize().

Here is the call graph for this function:

◆ set_func_attribute()

void jitify::experimental::KernelInstantiation::set_func_attribute	(	CUfunction_attribute	attribute,
		int	value
	)		const

inline

References magic_enum::detail::value().

Here is the call graph for this function:

◆ set_global_array()

template<typename T >

CUresult jitify::experimental::KernelInstantiation::set_global_array	(	const char *	name,
		const T *	data,
		size_t	count,
		CUstream	stream = `0`
	)		const

inline

References data.

Referenced by set_global_value().

Here is the caller graph for this function:

◆ set_global_value()

template<typename T >

CUresult jitify::experimental::KernelInstantiation::set_global_value	(	const char *	name,
		const T &	value,
		CUstream	stream = `0`
	)		const

inline

References set_global_array(), and magic_enum::detail::value().

Here is the call graph for this function:

Friends And Related Function Documentation

◆ KernelLauncher

friend class KernelLauncher

friend

Referenced by configure().

The documentation for this class was generated from the following file:

/nebulastream/nes-runtime/include/Util/jitify/jitify.hpp

Public Member Functions

Static Public Member Functions

Friends

Constructor & Destructor Documentation

◆ KernelInstantiation()

Member Function Documentation

◆ configure()

◆ configure_1d_max_occupancy()

◆ deserialize()

◆ get_constant_ptr()

◆ get_func_attribute()

◆ get_global_array()

◆ get_global_ptr()

◆ get_global_value()

◆ link_files()

◆ link_paths()

◆ mangled_name()

◆ operator CUfunction()

◆ ptx()

◆ serialize()

◆ set_func_attribute()

◆ set_global_array()

◆ set_global_value()

Friends And Related Function Documentation

◆ KernelLauncher