Skip to main content

atomr_accel_cuda/completion/
mod.rs

1//! Completion strategies (§5.10).
2//!
3//! `CompletionStrategy` decides how the runtime detects that a stream
4//! has finished its outstanding work. The F1 default is
5//! `HostFnCompletion`, which uses `cuLaunchHostFunc` to schedule a
6//! callback that wakes a Tokio waker — sub-microsecond latency, no
7//! polling overhead, scales to many concurrent operations.
8//!
9//! Two fallback strategies are present as stubs for F2: `PolledCompletion`
10//! for environments that block host-functions, and `SyncCompletion` for
11//! debugging / deterministic-replay testing.
12
13mod host_fn;
14mod poll;
15mod sync;
16
17pub use host_fn::HostFnCompletion;
18pub use poll::PolledCompletion;
19pub use sync::SyncCompletion;
20
21use std::sync::Arc;
22
23use futures_util::future::BoxFuture;
24
25use crate::error::GpuError;
26
27pub trait CompletionStrategy: Send + Sync {
28    /// Return a future that resolves when all preceding work on `stream`
29    /// has completed. Implementations differ in how completion is
30    /// detected.
31    fn await_completion(
32        &self,
33        stream: &Arc<cudarc::driver::CudaStream>,
34    ) -> BoxFuture<'static, Result<(), GpuError>>;
35}