Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,19 @@ repos:
- "//"

# Rust Linter
- repo: local
- repo: https://github.com/doublify/pre-commit-rust
rev: v1.0
hooks:
- id: rust-fmt
name: rustfmt
entry: bash -c 'cd qdp && cargo fmt --all'
language: system
types: [rust]
- id: fmt
pass_filenames: false
- id: rust-clippy
name: clippy
entry: cargo clippy --manifest-path qdp/Cargo.toml --all-targets --all-features --fix --allow-dirty --allow-staged -- -D warnings
language: system
types: [rust]
args: ['--manifest-path', 'qdp/Cargo.toml', '--all']
- id: clippy
# clippy needs context of the whole crate to compile correctly
pass_filenames: false
args: [
'--manifest-path', 'qdp/Cargo.toml',
'--all-targets',
'--all-features',
'--',
'-D', 'warnings'
]
2 changes: 0 additions & 2 deletions qdp/qdp-core/src/gpu/cuda_ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

//! Centralized CUDA Runtime API FFI declarations.

#![cfg(target_os = "linux")]

use std::ffi::c_void;

pub(crate) const CUDA_MEMCPY_HOST_TO_DEVICE: u32 = 1;
Expand Down
20 changes: 20 additions & 0 deletions qdp/qdp-core/src/gpu/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ impl PipelineContext {
}

/// Async H2D copy on copy stream
///
/// # Safety
///
/// The caller must ensure that:
/// - `dst` points to valid device memory of at least `len_elements * sizeof(f64)` bytes
/// - `src` is a valid pinned buffer with at least `len_elements` elements
/// - The memory regions do not overlap in an undefined way
/// - The CUDA stream is valid and properly initialized
pub unsafe fn async_copy_to_device(
&self,
src: &PinnedBuffer,
Expand All @@ -89,13 +97,21 @@ impl PipelineContext {
}

/// Record copy completion event
///
/// # Safety
///
/// The caller must ensure that the CUDA event and stream are valid and properly initialized.
pub unsafe fn record_copy_done(&self) {
unsafe {
cudaEventRecord(self.event_copy_done, self.stream_copy.stream as *mut c_void);
}
}

/// Make compute stream wait for copy completion
///
/// # Safety
///
/// The caller must ensure that the compute stream and copy event are valid and properly initialized.
pub unsafe fn wait_for_copy(&self) {
crate::profile_scope!("GPU::StreamWait");
unsafe {
Expand All @@ -108,6 +124,10 @@ impl PipelineContext {
}

/// Sync copy stream (safe to reuse host buffer)
///
/// # Safety
///
/// The caller must ensure that the copy stream is valid and properly initialized.
pub unsafe fn sync_copy_stream(&self) {
crate::profile_scope!("Pipeline::SyncCopy");
unsafe {
Expand Down
10 changes: 6 additions & 4 deletions qdp/qdp-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ use std::sync::mpsc::{Receiver, SyncSender, sync_channel};
#[cfg(target_os = "linux")]
use std::thread;

#[cfg(target_os = "linux")]
type BufferResult = std::result::Result<(PinnedBuffer, usize), MahoutError>;
#[cfg(target_os = "linux")]
type BufferChannels = (SyncSender<BufferResult>, Receiver<BufferResult>);

use crate::dlpack::DLManagedTensor;
#[cfg(target_os = "linux")]
use crate::gpu::PipelineContext;
Expand Down Expand Up @@ -200,10 +205,7 @@ impl QdpEngine {
let dev_in_b = unsafe { self.device.alloc::<f64>(STAGE_SIZE_ELEMENTS) }
.map_err(|e| MahoutError::MemoryAllocation(format!("{:?}", e)))?;

let (full_buf_tx, full_buf_rx): (
SyncSender<std::result::Result<(PinnedBuffer, usize), MahoutError>>,
Receiver<std::result::Result<(PinnedBuffer, usize), MahoutError>>,
) = sync_channel(2);
let (full_buf_tx, full_buf_rx): BufferChannels = sync_channel(2);
let (empty_buf_tx, empty_buf_rx): (SyncSender<PinnedBuffer>, Receiver<PinnedBuffer>) =
sync_channel(2);

Expand Down
4 changes: 2 additions & 2 deletions qdp/qdp-kernels/tests/amplitude_encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,9 +509,9 @@ fn test_amplitude_encode_small_input_large_state() {
assert!((state_h[1].x - 0.8).abs() < EPSILON);

// Rest should be zero
for i in 2..state_len {
for (i, item) in state_h.iter().enumerate().take(state_len).skip(2) {
assert!(
state_h[i].x.abs() < EPSILON && state_h[i].y.abs() < EPSILON,
item.x.abs() < EPSILON && item.y.abs() < EPSILON,
"Element {} should be zero-padded",
i
);
Expand Down
Loading