File tree Expand file tree Collapse file tree 1 file changed +6
-3
lines changed
Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Original file line number Diff line number Diff line change @@ -5975,7 +5975,7 @@ static void ggml_cuda_op_mul_mat(
59755975
59765976 // if multiple devices are used they need to wait for the main device
59775977 // here an event is recorded that signals that the main device has finished calculating the input data
5978- if (split) {
5978+ if (split && g_device_count > 1 ) {
59795979 CUDA_CHECK (cudaSetDevice (g_main_device));
59805980 CUDA_CHECK (cudaEventRecord (src0_extra->events [g_main_device][0 ], g_cudaStreams[g_main_device][0 ]));
59815981 }
@@ -6114,10 +6114,13 @@ static void ggml_cuda_op_mul_mat(
61146114 }
61156115
61166116 // main device waits for all other devices to be finished
6117- if (split) {
6117+ if (split && g_device_count > 1 ) {
6118+ int64_t is_max = (ne11 + MUL_MAT_SRC1_COL_STRIDE - 1 ) / MUL_MAT_SRC1_COL_STRIDE;
6119+ is_max = is_max <= MAX_STREAMS ? is_max : MAX_STREAMS;
6120+
61186121 CUDA_CHECK (cudaSetDevice (g_main_device));
61196122 for (int64_t id = 0 ; id < g_device_count; ++id) {
6120- for (int64_t is = 0 ; is < MAX_STREAMS ; ++is) {
6123+ for (int64_t is = 0 ; is < is_max ; ++is) {
61216124 CUDA_CHECK (cudaStreamWaitEvent (g_cudaStreams[g_main_device][0 ], src0_extra->events [id][is]));
61226125 }
61236126 }
You can’t perform that action at this time.
0 commit comments