Skip to content

Changing optimization level of host compiler causes NVRTC_ERROR_COMPILATION #195

@ShatrovOA

Description

@ShatrovOA

Hello!

I am using Nvidia HPC SDK Compiler 24.5
When trying to build following sample program with flag "-O2" or "-O3 -fast" VkFFT produces invalid device code, which fails to compile.

The error is
VkFFT.cu(453): error: identifier "f" is undefined

loc_1.x = fma(temp_2.x, 8.85456025653209896e-01f, loc_1.x);
loc_1.y = fma(temp_7.y, 8.85456025653209896e-01f, loc_1.y);
loc_12.x = fma(temp_2.y, 4.64723172043768546e-01f, loc_12.x);
loc_12.y = fma(temp_7.x, 4.64723172043768546e-01f, loc_12.y);
loc_2.x = fma(temp_2.x, -9.70941817426052027e-01f, loc_2.x);
loc_2.y = fma(temp_7.y, -9.70941817426052027e-01f, loc_2.y);
loc_11.x = fma(temp_2.y, f, loc_11.x); // HERE and also a lot of times later
loc_11.y = fma(temp_7.x, f, loc_11.y);

Passing flag "-O1" seems to work just fine.
VkFFT is latest from develop branch

Here is the sample:

#include <stdio.h>
#include <stdint.h>

#define VKFFT_BACKEND 1
#include "vkFFT.h"

#define VKFFT_CALL(call)                                          \
  do {                                                            \
    VkFFTResult ierr = call;                                      \
    if (ierr != VKFFT_SUCCESS) {                                  \
      fprintf(stderr, "Fatal error in vkFFT: %s at %s:%d\n",      \
          getVkFFTErrorString(ierr), __FILE__, __LINE__);         \
      exit(ierr);                            \
    }                                                             \
  } while (0)

#define CUDA_CALL(call)                                           \
  do {                                                            \
    cudaError_t ierr = call;                                      \
    if ( ierr != cudaSuccess ) {                                 \
      fprintf(stderr, "Fatal error in CUDA: %s at %s:%d\n",       \
          cudaGetErrorString(ierr), __FILE__, __LINE__);          \
      exit(ierr);                            \
    }                                                             \
  } while (0)


void vkfft_create(const int8_t rank, const int *dims, const int how_many,
                  const int8_t r2c, const int8_t c2r, cudaStream_t stream, VkFFTApplication **app_handle) {
  VkFFTConfiguration config = {};
  VkFFTApplication* app = (VkFFTApplication*)calloc(1, sizeof(VkFFTApplication));

  config.FFTdim = rank;
  int dim;
  for (dim = 0; dim < rank; dim++)
  {
    config.size[dim] = dims[dim];
  }
  config.doublePrecision = 0;
  config.numberBatches = how_many;

  CUdevice device;
  int device_num;

  CUDA_CALL( cudaGetDevice(&device_num) );
  CUDA_CALL( cuDeviceGet(&device, device_num) );
  config.device = &device;
  config.stream = (cudaStream_t*)malloc(sizeof(cudaStream_t));
  config.stream[0] = stream;
  config.num_streams = 1;

  config.isInputFormatted = 1;
  config.isOutputFormatted = 1;
  config.performDCT = 0;
  config.performDST = 0;

  if ( r2c || c2r ) {
    config.performR2C = 1;
    if ( r2c ) {
      config.inputBufferStride[0] = dims[0];
      config.outputBufferStride[0] = (dims[0] / 2) + 1;
      config.makeForwardPlanOnly = 1;
    } else {
      config.inputBufferStride[0] = (dims[0] / 2) + 1;
      config.outputBufferStride[0] = dims[0];
      config.makeInversePlanOnly = 1;
    }
    for ( dim = 1; dim < rank; dim++ ) {
      config.inputBufferStride[dim] = config.inputBufferStride[dim - 1] * dims[dim];
      config.outputBufferStride[dim] = config.outputBufferStride[dim - 1] * dims[dim];
    }
  }

  VKFFT_CALL( initializeVkFFT(app, config) );
  *app_handle = app;
}

int main() {

  CUDA_CALL( cudaSetDevice(0));
  CUDA_CALL( cudaFree(0) );
  cudaStream_t stream;

  CUDA_CALL( cudaStreamCreate(&stream) );

  int dims[2] = {513, 711};

  VkFFTApplication *plan_forward;
  VkFFTApplication *plan_backward;

  vkfft_create(2, dims,  33, 1, 0, stream, &plan_forward);
  printf("Forward created\n");

  vkfft_create(2, dims,  33, 0, 1, stream, &plan_backward);
  printf("Backward created\n");
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions