-
-
Couldn't load subscription status.
- Fork 118
Open
Description
Hello!
I am using Nvidia HPC SDK Compiler 24.5
When trying to build following sample program with flag "-O2" or "-O3 -fast" VkFFT produces invalid device code, which fails to compile.
The error is
VkFFT.cu(453): error: identifier "f" is undefined
loc_1.x = fma(temp_2.x, 8.85456025653209896e-01f, loc_1.x);
loc_1.y = fma(temp_7.y, 8.85456025653209896e-01f, loc_1.y);
loc_12.x = fma(temp_2.y, 4.64723172043768546e-01f, loc_12.x);
loc_12.y = fma(temp_7.x, 4.64723172043768546e-01f, loc_12.y);
loc_2.x = fma(temp_2.x, -9.70941817426052027e-01f, loc_2.x);
loc_2.y = fma(temp_7.y, -9.70941817426052027e-01f, loc_2.y);
loc_11.x = fma(temp_2.y, f, loc_11.x); // HERE and also a lot of times later
loc_11.y = fma(temp_7.x, f, loc_11.y);Passing flag "-O1" seems to work just fine.
VkFFT is latest from develop branch
Here is the sample:
#include <stdio.h>
#include <stdint.h>
#define VKFFT_BACKEND 1
#include "vkFFT.h"
#define VKFFT_CALL(call) \
do { \
VkFFTResult ierr = call; \
if (ierr != VKFFT_SUCCESS) { \
fprintf(stderr, "Fatal error in vkFFT: %s at %s:%d\n", \
getVkFFTErrorString(ierr), __FILE__, __LINE__); \
exit(ierr); \
} \
} while (0)
#define CUDA_CALL(call) \
do { \
cudaError_t ierr = call; \
if ( ierr != cudaSuccess ) { \
fprintf(stderr, "Fatal error in CUDA: %s at %s:%d\n", \
cudaGetErrorString(ierr), __FILE__, __LINE__); \
exit(ierr); \
} \
} while (0)
void vkfft_create(const int8_t rank, const int *dims, const int how_many,
const int8_t r2c, const int8_t c2r, cudaStream_t stream, VkFFTApplication **app_handle) {
VkFFTConfiguration config = {};
VkFFTApplication* app = (VkFFTApplication*)calloc(1, sizeof(VkFFTApplication));
config.FFTdim = rank;
int dim;
for (dim = 0; dim < rank; dim++)
{
config.size[dim] = dims[dim];
}
config.doublePrecision = 0;
config.numberBatches = how_many;
CUdevice device;
int device_num;
CUDA_CALL( cudaGetDevice(&device_num) );
CUDA_CALL( cuDeviceGet(&device, device_num) );
config.device = &device;
config.stream = (cudaStream_t*)malloc(sizeof(cudaStream_t));
config.stream[0] = stream;
config.num_streams = 1;
config.isInputFormatted = 1;
config.isOutputFormatted = 1;
config.performDCT = 0;
config.performDST = 0;
if ( r2c || c2r ) {
config.performR2C = 1;
if ( r2c ) {
config.inputBufferStride[0] = dims[0];
config.outputBufferStride[0] = (dims[0] / 2) + 1;
config.makeForwardPlanOnly = 1;
} else {
config.inputBufferStride[0] = (dims[0] / 2) + 1;
config.outputBufferStride[0] = dims[0];
config.makeInversePlanOnly = 1;
}
for ( dim = 1; dim < rank; dim++ ) {
config.inputBufferStride[dim] = config.inputBufferStride[dim - 1] * dims[dim];
config.outputBufferStride[dim] = config.outputBufferStride[dim - 1] * dims[dim];
}
}
VKFFT_CALL( initializeVkFFT(app, config) );
*app_handle = app;
}
int main() {
CUDA_CALL( cudaSetDevice(0));
CUDA_CALL( cudaFree(0) );
cudaStream_t stream;
CUDA_CALL( cudaStreamCreate(&stream) );
int dims[2] = {513, 711};
VkFFTApplication *plan_forward;
VkFFTApplication *plan_backward;
vkfft_create(2, dims, 33, 1, 0, stream, &plan_forward);
printf("Forward created\n");
vkfft_create(2, dims, 33, 0, 1, stream, &plan_backward);
printf("Backward created\n");
}Metadata
Metadata
Assignees
Labels
No labels