atomr_accel_cuda/stream/mod.rs
1//! Stream allocation strategies (§5.7).
2//!
3//! `StreamAllocator` controls whether each `KernelActor` owns its own
4//! stream, shares from a pool, or runs everything on a single stream.
5//! F1 ships only the `PerActor` default (zero contention, max
6//! concurrency); the other three strategies are present as trait impls
7//! so the surface is exercised, but their full behaviour is F2 work.
8
9mod per_actor;
10mod pool;
11mod single;
12
13pub use per_actor::PerActorAllocator;
14pub use pool::PooledAllocator;
15pub use single::SingleStreamAllocator;
16
17use std::sync::Arc;
18
19/// Hints the allocator may use when assigning a stream. Forward-compatible
20/// — F1 ignores both fields, F2+ allocators (priority-pooled) consume
21/// them.
22#[derive(Debug, Clone, Copy)]
23pub struct ActorHints {
24 pub priority: Priority,
25 pub workload: WorkloadKind,
26}
27
28impl Default for ActorHints {
29 fn default() -> Self {
30 Self {
31 priority: Priority::Normal,
32 workload: WorkloadKind::ShortLatencyBound,
33 }
34 }
35}
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum Priority {
39 Low,
40 Normal,
41 High,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum WorkloadKind {
46 ShortLatencyBound,
47 LongThroughputBound,
48}
49
50/// Pluggable stream allocator (§5.7). Implemented by `PerActorAllocator`
51/// (F1 default) and three stubs.
52pub trait StreamAllocator: Send + Sync {
53 /// Acquire a stream for a `KernelActor` that's just starting.
54 fn acquire(&self, hints: ActorHints) -> Arc<cudarc::driver::CudaStream>;
55
56 /// Release the stream when the `KernelActor` stops. Default no-op
57 /// because most strategies treat streams as owned by the allocator,
58 /// not the caller.
59 fn release(&self, _stream: Arc<cudarc::driver::CudaStream>) {}
60}