-
Notifications
You must be signed in to change notification settings - Fork 14.1k
Closed
Labels
Description
Git commit
Operating systems
Linux
GGML backends
CUDA
Problem description & steps to reproduce
My daily compilation of llama.cpp from newest source failed on convert.cu. Can confirm reverting to 18f3b5f fixes it.
First Bad Commit
Compile command
cmake -B build -DGGML_CUDA=ON -DGGML_CUDA_FORCE_CUBLAS=OFF -DGGML_CUDA_FA_ALL_QUANTS=ON -DGGML_CUDA_F16=ON -DGGML_BLAS=ON -DGGML_BLAS_VEND
OR=OpenBLAS
cmake --build build --config Release -j 8Relevant log output
[ 9%] Building CUDA object ggml/src/ggml-cuda/CMakeFiles/ggml-cuda.dir/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu.o
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_0, dst_t=nv_bfloat16]" at line 764
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_0, dst_t=nv_bfloat16]" at line 764
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_1, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_1, dst_t=nv_bfloat16]" at line 766
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_1, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q4_1, dst_t=nv_bfloat16]" at line 766
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_0, dst_t=nv_bfloat16]" at line 768
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_0, dst_t=nv_bfloat16]" at line 768
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_1, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_1, dst_t=nv_bfloat16]" at line 770
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_1, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=2, dequantize_kernel=dequantize_q5_1, dst_t=nv_bfloat16]" at line 770
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(34): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + 0] = v.x;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=1, dequantize_kernel=dequantize_q8_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=1, dequantize_kernel=dequantize_q8_0, dst_t=nv_bfloat16]" at line 772
/devel/tools/llama.cpp/ggml/src/ggml-cuda/convert.cu(35): error: more than one operator "=" matches these operands:
function "__nv_bfloat16::operator=(float)" (declared at line 305 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(short)" (declared at line 530 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned short)" (declared at line 534 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(int)" (declared at line 538 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned int)" (declared at line 542 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(long long)" (declared at line 546 of /usr/include/cuda_bf16.hpp)
function "__nv_bfloat16::operator=(unsigned long long)" (declared at line 550 of /usr/include/cuda_bf16.hpp)
operand types are: nv_bfloat16 = __half
y[iy0 + y_offset] = v.y;
^
detected during:
instantiation of "void dequantize_block<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t) [with qk=32, qr=1, dequantize_kernel=dequantize_q8_0, dst_t=nv_bfloat16]" at line 474
instantiation of "void dequantize_block_cuda<qk,qr,dequantize_kernel,dst_t>(const void *, dst_t *, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, cudaStream_t) [with qk=32, qr=1, dequantize_kernel=dequantize_q8_0, dst_t=nv_bfloat16]" at line 772