Skip to main content

atomr_accel_cuda/memory/
ipc.rs

1//! `cuIpcGetMemHandle` / `cuIpcOpenMemHandle` / `cuIpcCloseMemHandle`
2//! wrappers (gated `cuda-ipc`).
3//!
4//! IPC mem handles let one CUDA process expose a device allocation to
5//! another for zero-copy sharing. The 64-byte payload is opaque; ship
6//! it via your application's IPC channel (Unix socket, shared file,
7//! gRPC, etc.) and reopen it on the destination.
8//!
9//! Lifecycle notes:
10//! - Both processes must use the same CUDA driver and a peer-capable
11//!   GPU pair.
12//! - `OpenedMem` owns the lifetime of the imported pointer; `Drop`
13//!   calls `cuIpcCloseMemHandle`.
14//! - The exporter must outlive the importer's use of the handle —
15//!   freeing the source allocation while the importer still holds an
16//!   open handle is undefined.
17
18#![cfg(feature = "cuda-ipc")]
19
20use cudarc::driver::sys as driver_sys;
21
22use crate::error::GpuError;
23use crate::sys::cuda_driver;
24
25/// Cross-process IPC handle for a memory range. 64 bytes of opaque
26/// payload — interpret only by re-opening on the destination.
27#[derive(Clone, Copy)]
28pub struct IpcMemHandle {
29    pub(crate) raw: driver_sys::CUipcMemHandle,
30}
31
32impl IpcMemHandle {
33    pub fn as_bytes(&self) -> [u8; 64] {
34        // SAFETY: `[c_char; 64]` and `[u8; 64]` share layout.
35        unsafe { std::mem::transmute::<[std::ffi::c_char; 64], [u8; 64]>(self.raw.reserved) }
36    }
37
38    pub fn from_bytes(bytes: [u8; 64]) -> Self {
39        let raw = driver_sys::CUipcMemHandle_st {
40            // SAFETY: layout-compatible.
41            reserved: unsafe { std::mem::transmute::<[u8; 64], [std::ffi::c_char; 64]>(bytes) },
42        };
43        Self { raw }
44    }
45}
46
47impl std::fmt::Debug for IpcMemHandle {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        f.debug_struct("IpcMemHandle").finish()
50    }
51}
52
53unsafe impl Send for IpcMemHandle {}
54unsafe impl Sync for IpcMemHandle {}
55
56/// Imported memory handle. `Drop` releases the mapping via
57/// `cuIpcCloseMemHandle`. Cloning is not supported — only one
58/// `OpenedMem` may exist per import to keep the `Drop` semantics
59/// straightforward.
60#[derive(Debug)]
61pub struct OpenedMem {
62    dev_ptr: driver_sys::CUdeviceptr,
63    bytes: usize,
64}
65
66impl OpenedMem {
67    pub fn dev_ptr(&self) -> driver_sys::CUdeviceptr {
68        self.dev_ptr
69    }
70
71    pub fn bytes(&self) -> usize {
72        self.bytes
73    }
74}
75
76impl Drop for OpenedMem {
77    fn drop(&mut self) {
78        if self.dev_ptr != 0 {
79            let _ = cuda_driver::ipc_close_mem_handle(self.dev_ptr);
80        }
81    }
82}
83
84unsafe impl Send for OpenedMem {}
85unsafe impl Sync for OpenedMem {}
86
87/// Export an IPC handle for a device allocation.
88pub fn get_mem_handle(dev_ptr: driver_sys::CUdeviceptr) -> Result<IpcMemHandle, GpuError> {
89    cuda_driver::ipc_get_mem_handle(dev_ptr).map(|raw| IpcMemHandle { raw })
90}
91
92/// Open a previously-exported IPC handle.
93///
94/// `bytes` is the original allocation size; we pass it through to
95/// `OpenedMem` so callers can build a typed slice on top.
96pub fn open_mem_handle(handle: IpcMemHandle, bytes: usize) -> Result<OpenedMem, GpuError> {
97    let dev_ptr = cuda_driver::ipc_open_mem_handle_v2(
98        handle.raw, // CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 1
99        1,
100    )?;
101    Ok(OpenedMem { dev_ptr, bytes })
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    #[test]
109    fn handle_round_trip() {
110        let bytes: [u8; 64] = std::array::from_fn(|i| (i * 3) as u8 ^ 0x55);
111        let h = IpcMemHandle::from_bytes(bytes);
112        let round = h.as_bytes();
113        assert_eq!(round, bytes);
114        // Type-level send/sync sanity.
115        fn assert_send_sync<T: Send + Sync>() {}
116        assert_send_sync::<IpcMemHandle>();
117        assert_send_sync::<OpenedMem>();
118    }
119
120    #[test]
121    fn open_returns_typed_error_on_no_driver() {
122        let h = IpcMemHandle::from_bytes([0u8; 64]);
123        let r = open_mem_handle(h, 0);
124        match r {
125            Ok(_) => {}
126            Err(GpuError::Unrecoverable(_)) => {}
127            Err(GpuError::LibraryError { .. }) => {}
128            other => panic!("unexpected: {other:?}"),
129        }
130    }
131}