atomr_accel_cuda/pipeline/stage.rs
1//! [`PipelineStage`] trait.
2
3use std::sync::Arc;
4
5use cudarc::driver::{CudaEvent, CudaStream};
6
7use crate::error::GpuError;
8
9/// One stage in a multi-stream GPU pipeline.
10///
11/// Implementations enqueue their kernel onto `stream` synchronously
12/// (no host wait) and return a `CudaEvent` marking the completion of
13/// that stage's GPU work, plus the typed output. The executor
14/// arranges that the next stage's `wait_for` is the previous stage's
15/// returned event, so cross-stage synchronization is on-device only.
16pub trait PipelineStage: Send + 'static {
17 type In: Send + 'static;
18 type Out: Send + 'static;
19
20 fn enqueue(
21 &mut self,
22 stream: &Arc<CudaStream>,
23 wait_for: Option<&CudaEvent>,
24 input: Self::In,
25 ) -> Result<(CudaEvent, Self::Out), GpuError>;
26}