1pub use crate::completion::{CompletionStrategy, HostFnCompletion};
4pub use crate::device::{
5 ContextActor, ContextMsg, DeviceActor, DeviceConfig, DeviceLoad, DeviceMsg, DeviceState,
6 EnabledLibraries, HostBuf, KernelChildren, SgemmRequest,
7};
8pub use crate::dispatcher::GpuDispatcher;
9pub use crate::error::{decider, device_supervisor_strategy, DeviceSupervisor, GpuError};
10pub use crate::gpu_ref::GpuRef;
11#[cfg(feature = "cufft")]
12pub use crate::graph::FftR2COp;
13#[allow(deprecated)]
14pub use crate::graph::GraphOpLegacy;
15#[cfg(feature = "curand")]
16pub use crate::graph::RngFillUniformOp;
17pub use crate::graph::{
18 GraphActor, GraphHandle, GraphMsg, GraphOp, GraphRecordCtx, MemcpyOp, SgemmOp,
19};
20pub use crate::host::{
21 PinnedBuf, PinnedBufferPool, PinnedBufferPoolConfig, PinnedPoolMsg, PinnedPoolStats,
22};
23#[cfg(feature = "cublaslt")]
24pub use crate::kernel::dispatch::{BlasLtDispatch, BlasLtDispatchCtx};
25#[cfg(feature = "nccl")]
26pub use crate::kernel::dispatch::{CollectiveDispatch, CollectiveDispatchCtx};
27#[cfg(feature = "cudnn")]
28pub use crate::kernel::dispatch::{CudnnDispatch, CudnnDispatchCtx};
29pub use crate::kernel::dispatch::{
30 DevSliceArg, GemmDispatch, GemmDispatchCtx, NvrtcDispatchCtx, NvrtcLaunchDispatch, RngDispatch,
31 ScalarArg,
32};
33#[cfg(feature = "cufft")]
34pub use crate::kernel::dispatch::{FftDispatch, FftDispatchCtx};
35#[cfg(feature = "cusparse")]
36pub use crate::kernel::dispatch::{SendSparseHandle, SparseDispatch, SparseDispatchCtx, SparseOp};
37#[cfg(feature = "cutensor")]
38pub use crate::kernel::dispatch::{TensorDispatch, TensorDispatchCtx};
39pub use crate::kernel::envelope;
40pub use crate::kernel::record::RecordMode;
41pub use crate::kernel::{BlasActor, BlasMsg};
42pub use crate::memory::{
43 ManagedAllocatorActor, ManagedFlags, ManagedMsg, ManagedRef, ManagedStats,
44};
45pub use crate::p2p::{P2pGraph, P2pMsg, P2pTopology};
46pub use crate::pipeline::{
47 run_pipeline, spawn_pipeline, BoxedStage, PipelineExecutor, PipelineExecutorN, PipelineSink,
48 PipelineSource, PipelineStage, StageBox,
49};
50pub use crate::placement::{
51 DeviceChoice, LeastLoadedPolicy, PlacementActor, PlacementHints, PlacementMsg, PlacementPolicy,
52 RoundRobinPolicy,
53};
54pub use crate::replay::{
55 replay_via_sink, JournalEntry, ReplayHarness, ReplayMode, ReplayMsg, ReplaySink,
56};
57
58#[cfg(feature = "cusolver")]
59pub use crate::dtype::{CudaDtype, SolverSupported};
60#[cfg(feature = "cusolver")]
61pub use crate::kernel::{
62 CholeskyRequest, GesvdjBatchedRequest, GetrfBatchedRequest, HegvdRequest, LuRequest,
63 LuSolveRequest, PotrfBatchedRequest, QrRequest, SolverActor, SolverDispatch, SolverMsg,
64 SvdRequest, SyevdRequest, SygvdRequest, Uplo,
65};
66#[cfg(all(feature = "cusolver", feature = "cusolver-sp"))]
67pub use crate::kernel::{SparseCholeskyRequest, SparseLuRequest, SparseQrRequest};
68
69#[cfg(feature = "cusparse")]
70pub use crate::kernel::{CsrMatrix, SparseActor, SparseMsg};
71
72#[cfg(feature = "cutensor")]
73pub use crate::dtype::TensorSupported;
74#[cfg(feature = "cutensor")]
75pub use crate::kernel::{
76 ComputeDesc, ContractRequest, ElementwiseBinaryRequest, ElementwiseTrinaryRequest, OperandSpec,
77 PermutationRequest, ReductionRequest, TensorActor, TensorMsg, TensorSpec,
78};
79
80#[cfg(feature = "cublaslt")]
81pub use crate::kernel::{
82 Activation, BlasLtActor, BlasLtMsg, BlasLtWorkspacePool, Epilogue, HeuristicCacheRef,
83 MatmulRequest, ScaleSet,
84};
85
86#[cfg(feature = "nvrtc")]
87pub use crate::kernel::{KernelArg, KernelHandle, NvrtcActor, NvrtcMsg, NvrtcOpts};
88
89#[cfg(feature = "nccl")]
90pub use crate::kernel::{
91 AllGatherRequest, AllReduceRequest, AllToAllRequest, AllToAllvRequest, BroadcastRequest,
92 CollectiveActor, CollectiveMsg, GroupGuard, NcclCapabilities, NcclReduceSupported, PreMulSumOp,
93 RecvRequest, ReduceOp, ReduceRequest, ReduceScatterRequest, SendRequest,
94};
95#[cfg(feature = "nccl")]
96pub use crate::multi_device::{NcclWorldActor, NcclWorldConfig, NcclWorldMsg};
97pub use crate::stream::{
98 ActorHints, PerActorAllocator, PooledAllocator, Priority, SingleStreamAllocator,
99 StreamAllocator, WorkloadKind,
100};
101
102#[cfg(feature = "cudnn")]
103pub use crate::kernel::{
104 ActivationFwdRequest, ActivationKind, ActivationRequest, AttentionMask, AttentionParams,
105 BatchNormRequest, ConvBwdDataRequest, ConvBwdFilterRequest, ConvDescParams, ConvForwardRequest,
106 ConvFwdRequest, ConvParams, CudnnActor, CudnnMsg, DropoutFwdRequest, EpilogueKind,
107 GroupNormRequest, InstanceNormRequest, LayerNormRequest, LrnFwdRequest, LrnParams,
108 MultiHeadAttnBwdRequest, MultiHeadAttnFwdRequest, NormBwdRequest, NormMode, NormPhase,
109 PoolBwdRequest, PoolFwdRequest, PoolMode, PoolParams, RnnBwdRequest, RnnDirection,
110 RnnFwdRequest, RnnMode, RnnParams, SoftmaxFwdRequest, SoftmaxMode, SoftmaxRequest,
111 TensorLayout,
112};
113
114#[cfg(feature = "cufft")]
115pub use crate::kernel::{
116 FftActor, FftCallbackKind, FftDirection, FftKind, FftMsg, FftPlan, FftPlanMany, FftRequest,
117 PlanKey,
118};
119
120#[cfg(feature = "curand")]
121pub use crate::kernel::{Distribution, FillRequest, RngActor, RngGeneratorKind, RngMsg};
122
123#[cfg(feature = "tensorrt")]
125pub use atomr_accel_tensorrt as tensorrt;
126
127#[cfg(feature = "cutlass")]
129pub use atomr_accel_cutlass as cutlass;
130
131#[cfg(feature = "flashattn")]
133pub mod flashattn {
134 pub use atomr_accel_flashattn::{
135 ChunkLayout, ChunkedPrefillRequest, CumulativeSeqlens, DType, DispatchError, DispatchKey,
136 DispatchTable, Fa2BwdRequest, Fa2FwdRequest, Fa3FwdRequest, FaBwdDispatch, FaFwdDispatch,
137 FaPagedFwdDispatch, FlashAttnActor, FlashAttnError, FlashAttnInner, FlashAttnMsg,
138 FlashAttnProps, GemmSupported, MaskKind, PersistentMode, PositionBias, SmArch,
139 VarlenFwdRequest, DISPATCH_TABLE,
140 };
141
142 #[cfg(feature = "flashattn-fp8")]
143 pub use atomr_accel_flashattn::{F8E4m3, F8E5m2, Fa3FwdFp8Request};
144
145 #[cfg(feature = "flashattn-paged")]
146 pub use atomr_accel_flashattn::{PagedAttentionRequest, PagedKvCache};
147}
148
149#[cfg(feature = "nvtx-trace")]
151pub use atomr_accel_telemetry::nvtx::{Domain as NvtxDomain, NvtxKernelTrace};
152
153#[cfg(feature = "nvml")]
154pub use atomr_accel_telemetry::nvml::{
155 register_all as register_nvml_probes, NvmlActor, NvmlConfig, NvmlError, NvmlMsg, NvmlReply,
156 NvmlSnapshot, ProbeRegistration as NvmlProbeRegistration,
157};
158
159#[cfg(feature = "cupti")]
160pub use atomr_accel_telemetry::cupti::{
161 Activity, ActivityCategory, CuptiBootstrap, CuptiError, CuptiMsg, CuptiReply, CuptiSession,
162};
163
164#[cfg(any(feature = "nvtx-trace", feature = "nvml", feature = "cupti"))]
165pub use atomr_accel_telemetry::{KernelInfo, KernelTrace, NoopKernelTrace};