atomr_accel_cuda/device/alloc_msg.rs
1//! Typed-allocation + memcpy support types for `DeviceMsg`.
2//!
3//! F1 hard-coded `DeviceMsg::Allocate` to f32. F2 adds per-dtype
4//! variants ([`device_actor::DeviceMsg::AllocateF32`],
5//! `AllocateF64`, …). Each preserves `GpuRef<T>` static typing on the
6//! receive side — a runtime-tagged `DType` enum would erase that.
7//!
8//! Supported dtypes:
9//! - `f32`, `f64` — primary scientific computing types
10//! - `i8`, `i32`, `i64` — signed integer
11//! - `u8`, `u32`, `u64` — unsigned integer
12//! - `f16`, `bf16` — gated on the `f16` cargo feature
13
14use crate::host::PinnedBuf;
15
16/// Host-side buffer surface. Owned `Vec<T>` for low-volume
17/// convenience; [`PinnedBuf<T>`] for async-overlappable transfers
18/// sourced from a [`crate::host::PinnedBufferPool`].
19pub enum HostBuf<T> {
20 Owned(Vec<T>),
21 Pinned(PinnedBuf<T>),
22}
23
24impl<T> HostBuf<T> {
25 pub fn len(&self) -> usize {
26 match self {
27 HostBuf::Owned(v) => v.len(),
28 HostBuf::Pinned(p) => p.len(),
29 }
30 }
31
32 pub fn is_empty(&self) -> bool {
33 self.len() == 0
34 }
35}
36
37impl<T: std::fmt::Debug> std::fmt::Debug for HostBuf<T> {
38 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39 match self {
40 HostBuf::Owned(v) => f.debug_tuple("HostBuf::Owned").field(&v.len()).finish(),
41 HostBuf::Pinned(p) => f.debug_tuple("HostBuf::Pinned").field(&p.len()).finish(),
42 }
43 }
44}
45
46/// Per-device load snapshot returned by [`DeviceMsg::Stats`]. Used by
47/// the F5 [`crate::placement::PlacementActor`] for least-loaded
48/// scheduling.
49#[derive(Debug, Clone, Copy)]
50pub struct DeviceLoad {
51 pub free_bytes: usize,
52 pub total_bytes: usize,
53 pub active_streams: u32,
54 pub queue_depth: u32,
55 pub compute_cap: (i32, i32),
56}