Skip to main content

atomr_accel_cuda/completion/
sync.rs

1//! `SyncCompletion` (§5.10 `BlockingCompletion`) — blocks a dedicated
2//! thread in `cudaStreamSynchronize`. **Stub for F1.** Used by the
3//! deterministic-replay harness once that ships in B1.
4
5use std::sync::Arc;
6
7use futures_util::future::BoxFuture;
8use futures_util::FutureExt;
9
10use crate::error::GpuError;
11
12use super::CompletionStrategy;
13
14#[derive(Clone, Default)]
15pub struct SyncCompletion;
16
17impl SyncCompletion {
18    pub fn new() -> Self {
19        Self
20    }
21}
22
23impl CompletionStrategy for SyncCompletion {
24    fn await_completion(
25        &self,
26        stream: &Arc<cudarc::driver::CudaStream>,
27    ) -> BoxFuture<'static, Result<(), GpuError>> {
28        let stream = stream.clone();
29        async move {
30            // tokio::task::spawn_blocking is the right tool here — we
31            // genuinely block waiting for the GPU. F1 keeps this minimal;
32            // the production path is HostFnCompletion.
33            tokio::task::spawn_blocking(move || stream.synchronize())
34                .await
35                .map_err(|e| GpuError::Driver(format!("sync-completion task: {e}")))?
36                .map_err(|e| GpuError::Driver(format!("cudaStreamSynchronize: {e}")))
37        }
38        .boxed()
39    }
40}