pub fn launch_kernel( f: CUfunction, grid: (u32, u32, u32), block: (u32, u32, u32), shared_bytes: u32, stream: CUstream, kernel_params: *mut *mut c_void, ) -> Result<(), GpuError>