Skip to main content

atomr_accel_cuda/device/
alloc_msg.rs

1//! Typed-allocation + memcpy support types for `DeviceMsg`.
2//!
3//! F1 hard-coded `DeviceMsg::Allocate` to f32. F2 adds per-dtype
4//! variants ([`device_actor::DeviceMsg::AllocateF32`],
5//! `AllocateF64`, …). Each preserves `GpuRef<T>` static typing on the
6//! receive side — a runtime-tagged `DType` enum would erase that.
7//!
8//! Supported dtypes:
9//! - `f32`, `f64` — primary scientific computing types
10//! - `i8`, `i32`, `i64` — signed integer
11//! - `u8`, `u32`, `u64` — unsigned integer
12//! - `f16`, `bf16` — gated on the `f16` cargo feature
13
14use crate::host::PinnedBuf;
15
16/// Host-side buffer surface. Owned `Vec<T>` for low-volume
17/// convenience; [`PinnedBuf<T>`] for async-overlappable transfers
18/// sourced from a [`crate::host::PinnedBufferPool`].
19pub enum HostBuf<T> {
20    Owned(Vec<T>),
21    Pinned(PinnedBuf<T>),
22}
23
24impl<T> HostBuf<T> {
25    pub fn len(&self) -> usize {
26        match self {
27            HostBuf::Owned(v) => v.len(),
28            HostBuf::Pinned(p) => p.len(),
29        }
30    }
31
32    pub fn is_empty(&self) -> bool {
33        self.len() == 0
34    }
35}
36
37impl<T: std::fmt::Debug> std::fmt::Debug for HostBuf<T> {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            HostBuf::Owned(v) => f.debug_tuple("HostBuf::Owned").field(&v.len()).finish(),
41            HostBuf::Pinned(p) => f.debug_tuple("HostBuf::Pinned").field(&p.len()).finish(),
42        }
43    }
44}
45
46/// Per-device load snapshot returned by [`DeviceMsg::Stats`]. Used by
47/// the F5 [`crate::placement::PlacementActor`] for least-loaded
48/// scheduling.
49#[derive(Debug, Clone, Copy)]
50pub struct DeviceLoad {
51    pub free_bytes: usize,
52    pub total_bytes: usize,
53    pub active_streams: u32,
54    pub queue_depth: u32,
55    pub compute_cap: (i32, i32),
56}