@@ -237,11 +237,6 @@ OpenCLRuntime::OpenCLRuntime(const BackendConfig::PrecisionMode precision, const
237237 mFirstGPUDevicePtr ->getInfo (CL_DEVICE_MAX_MEM_ALLOC_SIZE, &mMaxMemAllocSize );
238238 mFirstGPUDevicePtr ->getInfo (CL_DEVICE_LOCAL_MEM_SIZE, &mMaxLocalMemSize );
239239 mMaxWorkGroupSize = mFirstGPUDevicePtr ->getInfo <CL_DEVICE_MAX_WORK_GROUP_SIZE>();
240- cl_device_fp_config fpConfig;
241- auto success = mFirstGPUDevicePtr ->getInfo (CL_DEVICE_HALF_FP_CONFIG, &fpConfig);
242- mIsDeviceSupportedFP16 = CL_SUCCESS == success && fpConfig > 0 ;
243- bool checkFp16Exetension = getDeviceSupportsExtension (*(mFirstGPUDevicePtr .get ()), " cl_khr_fp16" );
244- mIsDeviceSupportedFP16 = (mIsDeviceSupportedFP16 && checkFp16Exetension);
245240
246241 // set gpu mode, tuning level and memory object
247242 setGpuMode (cl_mode);
@@ -253,18 +248,8 @@ OpenCLRuntime::OpenCLRuntime(const BackendConfig::PrecisionMode precision, const
253248 mMemType = IMAGE;
254249 }
255250 }
256- mPrecisionLevel = 1 ;
257- if (mIsDeviceSupportedFP16 ) {
258- if (precision == BackendConfig::Precision_Low) {
259- mPrecisionLevel = 2 ;
260- } else if (precision == BackendConfig::Precision_Normal && mMemType == BUFFER) {
261- mPrecisionLevel = 0 ;
262- }
263- }
251+ setPrecision (precision);
264252
265- // Is supported fp16 IO storage
266- mIsSupportedFP16 = (mPrecisionLevel == 2 || mPrecisionLevel == 0 );
267-
268253 if (getDeviceSupportsExtension (*(mFirstGPUDevicePtr .get ()), " cl_arm_integer_dot_product_int8" )){
269254 mSupportDotInt8 = true ;
270255 }
@@ -515,6 +500,25 @@ uint64_t OpenCLRuntime::maxAllocSize() const {
515500 return mMaxMemAllocSize ;
516501}
517502
503+ void OpenCLRuntime::setPrecision (const BackendConfig::PrecisionMode precision){
504+ cl_device_fp_config fpConfig;
505+ auto success = mFirstGPUDevicePtr ->getInfo (CL_DEVICE_HALF_FP_CONFIG, &fpConfig);
506+ mIsDeviceSupportedFP16 = CL_SUCCESS == success && fpConfig > 0 ;
507+ bool checkFp16Exetension = getDeviceSupportsExtension (*(mFirstGPUDevicePtr .get ()), " cl_khr_fp16" );
508+ mIsDeviceSupportedFP16 = (mIsDeviceSupportedFP16 && checkFp16Exetension);
509+ mPrecisionLevel = 1 ;
510+ if (mIsDeviceSupportedFP16 ) {
511+ if (precision == BackendConfig::Precision_Low) {
512+ mPrecisionLevel = 2 ;
513+ } else if (precision == BackendConfig::Precision_Normal && mMemType == BUFFER) {
514+ mPrecisionLevel = 0 ;
515+ }
516+ }
517+
518+ // Is supported fp16 IO storage
519+ mIsSupportedFP16 = (mPrecisionLevel == 2 || mPrecisionLevel == 0 );
520+ }
521+
518522bool OpenCLRuntime::loadProgram (const std::string &programName, cl::Program *program) {
519523 std::lock_guard<std::mutex> lck (gCLMutex );
520524 auto it_source = OpenCLProgramMap.find (programName);
0 commit comments