Merge pull request #3136 from alibaba/feature/bugfix

jxt1234 · web-flow · commit c23d82cee71c · 2024-12-23T15:59:05.000+08:00
OpenCL:Bugfix: Fix bug for memory mode not valid when create backend
diff --git a/source/backend/opencl/core/OpenCLBackend.cpp b/source/backend/opencl/core/OpenCLBackend.cpp
@@ -192,8 +192,13 @@ std::pair<const void*, size_t> CLRuntime::onGetCache() {
 }
 
 Backend* CLRuntime::onCreate(const BackendConfig* config, Backend* origin) const {
-    // FIXME: Use config info
-    return new OpenCLBackend(mImagePool, mBufferPool, this);
+    auto precision = mPrecision;
+    auto memory = mMemory;
+    if (nullptr != config) {
+        precision = config->precision;
+        memory = config->memory;
+    }
+    return new OpenCLBackend(precision, memory, mImagePool, mBufferPool, this);
 }
 
 void CLRuntime::onGabageCollect(int level) {
@@ -217,13 +222,14 @@ std::map<std::pair<OpType, GpuMemObject>, OpenCLBackend::Creator*>* gCreator() {
     return creators;
 };
 
-OpenCLBackend::OpenCLBackend(std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime)
+OpenCLBackend::OpenCLBackend(BackendConfig::PrecisionMode precision, BackendConfig::MemoryMode memory, std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime)
     : Backend(MNN_FORWARD_OPENCL) {
 
     mCLRuntime = runtime;
     mOpenCLRuntime = mCLRuntime->mOpenCLRuntime;
-    mPrecision = mCLRuntime->mPrecision;
-    mMemory = mCLRuntime->mMemory;
+    mPrecision = precision;
+    mMemory = memory;
+    mOpenCLRuntime->setPrecision(precision);
     mStaticImagePool = imgPool;
     mStaticBufferPool = bufPool;
     if(mOpenCLRuntime.get()){
diff --git a/source/backend/opencl/core/OpenCLBackend.hpp b/source/backend/opencl/core/OpenCLBackend.hpp
@@ -80,7 +80,7 @@ class CLRuntime : public Runtime {
 
 class OpenCLBackend : public Backend {
 public:
-    OpenCLBackend(std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime);
+    OpenCLBackend(BackendConfig::PrecisionMode precision, BackendConfig::MemoryMode memory, std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime);
     ~OpenCLBackend();
 
     OpenCLRuntime *getOpenCLRuntime();
diff --git a/source/backend/opencl/core/runtime/OpenCLRuntime.cpp b/source/backend/opencl/core/runtime/OpenCLRuntime.cpp
@@ -237,11 +237,6 @@ OpenCLRuntime::OpenCLRuntime(const BackendConfig::PrecisionMode precision, const
             mFirstGPUDevicePtr->getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &mMaxMemAllocSize);
             mFirstGPUDevicePtr->getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &mMaxLocalMemSize);
             mMaxWorkGroupSize = mFirstGPUDevicePtr->getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
-            cl_device_fp_config fpConfig;
-            auto success = mFirstGPUDevicePtr->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fpConfig);
-            mIsDeviceSupportedFP16     = CL_SUCCESS == success && fpConfig > 0;
-            bool checkFp16Exetension = getDeviceSupportsExtension(*(mFirstGPUDevicePtr.get()), "cl_khr_fp16");
-            mIsDeviceSupportedFP16 = (mIsDeviceSupportedFP16 && checkFp16Exetension);
             
             //set gpu mode, tuning level and memory object
             setGpuMode(cl_mode);
@@ -253,18 +248,8 @@ OpenCLRuntime::OpenCLRuntime(const BackendConfig::PrecisionMode precision, const
                     mMemType = IMAGE;
                 }
             }
-            mPrecisionLevel = 1;
-            if (mIsDeviceSupportedFP16) {
-                if (precision == BackendConfig::Precision_Low) {
-                    mPrecisionLevel = 2;
-                } else if (precision == BackendConfig::Precision_Normal && mMemType == BUFFER) {
-                    mPrecisionLevel = 0;
-                }
-            }
+            setPrecision(precision);
             
-            // Is supported fp16 IO storage
-            mIsSupportedFP16 = (mPrecisionLevel == 2 || mPrecisionLevel == 0);
-
             if(getDeviceSupportsExtension(*(mFirstGPUDevicePtr.get()), "cl_arm_integer_dot_product_int8")){
                 mSupportDotInt8 = true;
             }
@@ -515,6 +500,25 @@ uint64_t OpenCLRuntime::maxAllocSize() const {
     return mMaxMemAllocSize;
 }
 
+void OpenCLRuntime::setPrecision(const BackendConfig::PrecisionMode precision){
+    cl_device_fp_config fpConfig;
+    auto success = mFirstGPUDevicePtr->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fpConfig);
+    mIsDeviceSupportedFP16     = CL_SUCCESS == success && fpConfig > 0;
+    bool checkFp16Exetension = getDeviceSupportsExtension(*(mFirstGPUDevicePtr.get()), "cl_khr_fp16");
+    mIsDeviceSupportedFP16 = (mIsDeviceSupportedFP16 && checkFp16Exetension);
+    mPrecisionLevel = 1;
+    if (mIsDeviceSupportedFP16) {
+        if (precision == BackendConfig::Precision_Low) {
+            mPrecisionLevel = 2;
+        } else if (precision == BackendConfig::Precision_Normal && mMemType == BUFFER) {
+            mPrecisionLevel = 0;
+        }
+    }
+    
+    // Is supported fp16 IO storage
+    mIsSupportedFP16 = (mPrecisionLevel == 2 || mPrecisionLevel == 0);
+}
+
 bool OpenCLRuntime::loadProgram(const std::string &programName, cl::Program *program) {
     std::lock_guard<std::mutex> lck(gCLMutex);
     auto it_source = OpenCLProgramMap.find(programName);
diff --git a/source/backend/opencl/core/runtime/OpenCLRuntime.hpp b/source/backend/opencl/core/runtime/OpenCLRuntime.hpp
@@ -91,6 +91,7 @@ class OpenCLRuntime {
     uint64_t GetKernelWaveSize(std::shared_ptr<KernelWrap> kernel);
     std::vector<uint32_t> getMaxWorkItemSizes();
     uint64_t getMaxLocalMem() const;
+    void setPrecision(const BackendConfig::PrecisionMode precision);
     uint32_t getUseRecordableQueueSize(){
         return mUseRecordableQueueSize;
     }

Original file line number	Diff line number	Diff line change
`@@ -91,6 +91,7 @@ class OpenCLRuntime {`
`91`	`91`	`uint64_t GetKernelWaveSize(std::shared_ptr<KernelWrap> kernel);`
`92`	`92`	`std::vector<uint32_t> getMaxWorkItemSizes();`
`93`	`93`	`uint64_t getMaxLocalMem() const;`
	`94`	`+ void setPrecision(const BackendConfig::PrecisionMode precision);`
`94`	`95`	`uint32_t getUseRecordableQueueSize(){`
`95`	`96`	`return mUseRecordableQueueSize;`
`96`	`97`	`}`