pub struct MultiHeadAttnFwdRequest<T: CudnnSupported> {
pub q: GpuRef<T>,
pub k: GpuRef<T>,
pub v: GpuRef<T>,
pub o: GpuRef<T>,
pub stats: Option<GpuRef<T>>,
pub bias: Option<GpuRef<T>>,
pub layout: TensorLayout,
pub params: AttentionParams,
pub reply: Sender<Result<(), GpuError>>,
pub _ty: PhantomData<T>,
}Expand description
MHA forward request.
Fields§
§q: GpuRef<T>§k: GpuRef<T>§v: GpuRef<T>§o: GpuRef<T>§stats: Option<GpuRef<T>>Optional saved softmax-stats for backward.
bias: Option<GpuRef<T>>Optional bias added to attention scores.
layout: TensorLayout§params: AttentionParams§reply: Sender<Result<(), GpuError>>§_ty: PhantomData<T>Implementations§
Source§impl<T: CudnnSupported> MultiHeadAttnFwdRequest<T>
impl<T: CudnnSupported> MultiHeadAttnFwdRequest<T>
pub fn graph_spec(&self) -> OperationGraphSpec
Trait Implementations§
Source§impl<T: CudnnSupported> CudnnDispatch for MultiHeadAttnFwdRequest<T>
impl<T: CudnnSupported> CudnnDispatch for MultiHeadAttnFwdRequest<T>
Auto Trait Implementations§
impl<T> Freeze for MultiHeadAttnFwdRequest<T>
impl<T> !RefUnwindSafe for MultiHeadAttnFwdRequest<T>
impl<T> Send for MultiHeadAttnFwdRequest<T>
impl<T> Sync for MultiHeadAttnFwdRequest<T>
impl<T> Unpin for MultiHeadAttnFwdRequest<T>where
T: Unpin,
impl<T> UnsafeUnpin for MultiHeadAttnFwdRequest<T>
impl<T> !UnwindSafe for MultiHeadAttnFwdRequest<T>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more