atomr_accel_cuda/pipeline/mod.rs
1//! Multi-stream pipeline pattern.
2//!
3//! Lets users wire `Source -> KernelStage -> ... -> Sink` such that
4//! stage K+1 begins as soon as stage K's GPU work is complete, with
5//! cross-stage handoff via [`cudarc::driver::CudaEvent`] — no host
6//! roundtrip between stages.
7//!
8//! # Stage shape
9//!
10//! Implement [`PipelineStage`] for any kernel-actor adapter:
11//!
12//! ```ignore
13//! struct BlasSgemmStage { /* ... */ }
14//! impl PipelineStage for BlasSgemmStage {
15//! type In = (GpuRef<f32>, GpuRef<f32>);
16//! type Out = GpuRef<f32>;
17//! fn enqueue(
18//! &mut self, stream, wait_for, (a, b)
19//! ) -> Result<(CudaEvent, GpuRef<f32>), GpuError> {
20//! if let Some(ev) = wait_for { stream.wait(ev)?; }
21//! /* enqueue cuBLAS gemm via record-mode contract */
22//! let ev = stream.record_event(None)?;
23//! Ok((ev, c))
24//! }
25//! }
26//! ```
27//!
28//! F2 ships the trait + a thin executor; the full
29//! `PipelineBuilder<I, O>` type-state DSL with Source / Sink wrappers
30//! lands in F3 once we have more concrete patterns demanding it.
31
32mod executor;
33mod sink;
34mod stage;
35
36pub use executor::{run_pipeline, BoxedStage, PipelineExecutor, PipelineExecutorN, StageBox};
37pub use sink::{spawn_pipeline, PipelineSink, PipelineSource};
38pub use stage::PipelineStage;