Skip to content

Commit c23d82c

Browse files
authored
Merge pull request #3136 from alibaba/feature/bugfix
OpenCL:Bugfix: Fix bug for memory mode not valid when create backend
2 parents bbf1a93 + 62c2ec4 commit c23d82c

File tree

4 files changed

+33
-22
lines changed

4 files changed

+33
-22
lines changed

source/backend/opencl/core/OpenCLBackend.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,13 @@ std::pair<const void*, size_t> CLRuntime::onGetCache() {
192192
}
193193

194194
Backend* CLRuntime::onCreate(const BackendConfig* config, Backend* origin) const {
195-
// FIXME: Use config info
196-
return new OpenCLBackend(mImagePool, mBufferPool, this);
195+
auto precision = mPrecision;
196+
auto memory = mMemory;
197+
if (nullptr != config) {
198+
precision = config->precision;
199+
memory = config->memory;
200+
}
201+
return new OpenCLBackend(precision, memory, mImagePool, mBufferPool, this);
197202
}
198203

199204
void CLRuntime::onGabageCollect(int level) {
@@ -217,13 +222,14 @@ std::map<std::pair<OpType, GpuMemObject>, OpenCLBackend::Creator*>* gCreator() {
217222
return creators;
218223
};
219224

220-
OpenCLBackend::OpenCLBackend(std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime)
225+
OpenCLBackend::OpenCLBackend(BackendConfig::PrecisionMode precision, BackendConfig::MemoryMode memory, std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime)
221226
: Backend(MNN_FORWARD_OPENCL) {
222227

223228
mCLRuntime = runtime;
224229
mOpenCLRuntime = mCLRuntime->mOpenCLRuntime;
225-
mPrecision = mCLRuntime->mPrecision;
226-
mMemory = mCLRuntime->mMemory;
230+
mPrecision = precision;
231+
mMemory = memory;
232+
mOpenCLRuntime->setPrecision(precision);
227233
mStaticImagePool = imgPool;
228234
mStaticBufferPool = bufPool;
229235
if(mOpenCLRuntime.get()){

source/backend/opencl/core/OpenCLBackend.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class CLRuntime : public Runtime {
8080

8181
class OpenCLBackend : public Backend {
8282
public:
83-
OpenCLBackend(std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime);
83+
OpenCLBackend(BackendConfig::PrecisionMode precision, BackendConfig::MemoryMode memory, std::shared_ptr<ImagePool>imgPool, std::shared_ptr<BufferPool> bufPool, const CLRuntime *runtime);
8484
~OpenCLBackend();
8585

8686
OpenCLRuntime *getOpenCLRuntime();

source/backend/opencl/core/runtime/OpenCLRuntime.cpp

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -237,11 +237,6 @@ OpenCLRuntime::OpenCLRuntime(const BackendConfig::PrecisionMode precision, const
237237
mFirstGPUDevicePtr->getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &mMaxMemAllocSize);
238238
mFirstGPUDevicePtr->getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &mMaxLocalMemSize);
239239
mMaxWorkGroupSize = mFirstGPUDevicePtr->getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
240-
cl_device_fp_config fpConfig;
241-
auto success = mFirstGPUDevicePtr->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fpConfig);
242-
mIsDeviceSupportedFP16 = CL_SUCCESS == success && fpConfig > 0;
243-
bool checkFp16Exetension = getDeviceSupportsExtension(*(mFirstGPUDevicePtr.get()), "cl_khr_fp16");
244-
mIsDeviceSupportedFP16 = (mIsDeviceSupportedFP16 && checkFp16Exetension);
245240

246241
//set gpu mode, tuning level and memory object
247242
setGpuMode(cl_mode);
@@ -253,18 +248,8 @@ OpenCLRuntime::OpenCLRuntime(const BackendConfig::PrecisionMode precision, const
253248
mMemType = IMAGE;
254249
}
255250
}
256-
mPrecisionLevel = 1;
257-
if (mIsDeviceSupportedFP16) {
258-
if (precision == BackendConfig::Precision_Low) {
259-
mPrecisionLevel = 2;
260-
} else if (precision == BackendConfig::Precision_Normal && mMemType == BUFFER) {
261-
mPrecisionLevel = 0;
262-
}
263-
}
251+
setPrecision(precision);
264252

265-
// Is supported fp16 IO storage
266-
mIsSupportedFP16 = (mPrecisionLevel == 2 || mPrecisionLevel == 0);
267-
268253
if(getDeviceSupportsExtension(*(mFirstGPUDevicePtr.get()), "cl_arm_integer_dot_product_int8")){
269254
mSupportDotInt8 = true;
270255
}
@@ -515,6 +500,25 @@ uint64_t OpenCLRuntime::maxAllocSize() const {
515500
return mMaxMemAllocSize;
516501
}
517502

503+
void OpenCLRuntime::setPrecision(const BackendConfig::PrecisionMode precision){
504+
cl_device_fp_config fpConfig;
505+
auto success = mFirstGPUDevicePtr->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fpConfig);
506+
mIsDeviceSupportedFP16 = CL_SUCCESS == success && fpConfig > 0;
507+
bool checkFp16Exetension = getDeviceSupportsExtension(*(mFirstGPUDevicePtr.get()), "cl_khr_fp16");
508+
mIsDeviceSupportedFP16 = (mIsDeviceSupportedFP16 && checkFp16Exetension);
509+
mPrecisionLevel = 1;
510+
if (mIsDeviceSupportedFP16) {
511+
if (precision == BackendConfig::Precision_Low) {
512+
mPrecisionLevel = 2;
513+
} else if (precision == BackendConfig::Precision_Normal && mMemType == BUFFER) {
514+
mPrecisionLevel = 0;
515+
}
516+
}
517+
518+
// Is supported fp16 IO storage
519+
mIsSupportedFP16 = (mPrecisionLevel == 2 || mPrecisionLevel == 0);
520+
}
521+
518522
bool OpenCLRuntime::loadProgram(const std::string &programName, cl::Program *program) {
519523
std::lock_guard<std::mutex> lck(gCLMutex);
520524
auto it_source = OpenCLProgramMap.find(programName);

source/backend/opencl/core/runtime/OpenCLRuntime.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ class OpenCLRuntime {
9191
uint64_t GetKernelWaveSize(std::shared_ptr<KernelWrap> kernel);
9292
std::vector<uint32_t> getMaxWorkItemSizes();
9393
uint64_t getMaxLocalMem() const;
94+
void setPrecision(const BackendConfig::PrecisionMode precision);
9495
uint32_t getUseRecordableQueueSize(){
9596
return mUseRecordableQueueSize;
9697
}

0 commit comments

Comments
 (0)