message(STATUS "GGML_SYCL_TARGET=${GGML_SYCL_TARGET}") if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA|AMD)$") message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL, NVIDIA, or AMD") endif() check_cxx_compiler_flag("-fsycl" SUPPORTS_SYCL) if (DEFINED ENV{ONEAPI_ROOT}) message(STATUS "Using oneAPI Release SYCL compiler (icpx).") elseif(SUPPORTS_SYCL) message(WARNING "Using open-source SYCL compiler (clang++). Didn't detect ENV {ONEAPI_ROOT}. If you expected the oneAPI Release compiler, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") else() message(FATAL_ERROR "C++ compiler lacks SYCL support.") endif() message(STATUS "SYCL found") #todo: AOT ggml_add_backend_library(ggml-sycl ggml-sycl.cpp ../../include/ggml-sycl.h ) file(GLOB GGML_HEADERS_SYCL "*.hpp") file(GLOB GGML_SOURCES_SYCL "*.cpp") target_sources(ggml-sycl PRIVATE ${GGML_HEADERS_SYCL} ${GGML_SOURCES_SYCL}) if (WIN32) # To generate a Visual Studio solution, using Intel C++ Compiler for ggml-sycl is mandatory if( ${CMAKE_GENERATOR} MATCHES "Visual Studio" AND NOT (${CMAKE_GENERATOR_TOOLSET} MATCHES "Intel C")) set_target_properties(ggml-sycl PROPERTIES VS_PLATFORM_TOOLSET "Intel C++ Compiler 2025") set(CMAKE_CXX_COMPILER "icx") set(CMAKE_CXX_COMPILER_ID "IntelLLVM") endif() endif() find_package(IntelSYCL) if (IntelSYCL_FOUND) # Use oneAPI CMake when possible target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX) else() # Fallback to the simplest way of enabling SYCL when using intel/llvm nightly for instance target_compile_options(ggml-sycl PRIVATE "-fsycl") target_link_options(ggml-sycl PRIVATE "-fsycl") endif() target_compile_options(ggml-sycl PRIVATE "-Wno-narrowing") # Link against oneDNN set(GGML_SYCL_DNNL 0) if(GGML_SYCL_DNN) find_package(DNNL) if(DNNL_FOUND) if (NOT DEFINED DNNL_GPU_VENDOR) # default to intel target set(DNNL_GPU_VENDOR "INTEL") if(NOT "${GGML_SYCL_TARGET}" STREQUAL "INTEL") message(WARNING "oneDNN builds bundled with oneapi release only support INTEL target") endif() endif() # Verify oneDNN was compiled for the same target as llama if("${GGML_SYCL_TARGET}" STREQUAL "${DNNL_GPU_VENDOR}") target_link_libraries(ggml-sycl PRIVATE DNNL::dnnl) set(GGML_SYCL_DNNL 1) get_target_property(CONFIGS DNNL::dnnl IMPORTED_CONFIGURATIONS) foreach(CONFIG ${CONFIGS}) get_target_property(DNNL_LIB DNNL::dnnl IMPORTED_LOCATION_${CONFIG}) message(STATUS "Found oneDNN: ${DNNL_LIB}") endforeach() else() message(WARNING "oneDNN must be compiled for the same target as llama.cpp. llama.cpp: ${GGML_SYCL_TARGET}, oneDNN: ${DNNL_GPU_VENDOR}. Disabling oneDNN support.") endif() else() message(STATUS "oneDNN not found, disabling oneDNN support") endif() else() message(STATUS "oneDNN support disabled by the user") endif() target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_DNNL=${GGML_SYCL_DNNL}) if (GGML_SYCL_F16) if (GGML_SYCL_TARGET STREQUAL "AMD") message(WARNING "AMD target does not entirely support FP16 in the SYCL backend.") endif() add_compile_definitions(GGML_SYCL_F16) endif() if (GGML_SYCL_TARGET STREQUAL "NVIDIA") add_compile_definitions(GGML_SYCL_WARP_SIZE=32) elseif (GGML_SYCL_TARGET STREQUAL "AMD") # INFO: Allowed Sub_group_sizes are not consistent through all # hip targets. For example, 64 is used for certain models, but the backend # does not support it. # Target archs tested working: gfx1030, gfx1031, (Only tested sub_group_size = 32) add_compile_definitions(GGML_SYCL_WARP_SIZE=32) else() add_compile_definitions(GGML_SYCL_WARP_SIZE=16) endif() if (GGML_SYCL_GRAPH) target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GRAPH) endif() # Link against Intel oneMKL or oneMath if (GGML_SYCL_TARGET STREQUAL "INTEL") # Intel devices use Intel oneMKL directly instead of oneMath to avoid the limitation of linking Intel oneMKL statically # See https://github.com/uxlfoundation/oneMath/issues/654 if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set(SYCL_COMPILER ON) endif() find_package(MKL REQUIRED) target_link_libraries(ggml-sycl PRIVATE MKL::MKL_SYCL::BLAS) target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_USE_INTEL_ONEMKL) else() find_package(oneMath QUIET) if (NOT oneMath_FOUND) message(STATUS "oneMath not found: oneMath will be automatically downloaded") # Use FetchContent to automatically pull and build oneMath include(FetchContent) set(BUILD_FUNCTIONAL_TESTS False) set(BUILD_EXAMPLES False) set(TARGET_DOMAINS blas) if (GGML_SYCL_TARGET STREQUAL "NVIDIA") set(ENABLE_MKLCPU_BACKEND False) set(ENABLE_MKLGPU_BACKEND False) set(ENABLE_CUBLAS_BACKEND True) elseif (GGML_SYCL_TARGET STREQUAL "AMD") set(ENABLE_MKLCPU_BACKEND False) set(ENABLE_MKLGPU_BACKEND False) set(ENABLE_ROCBLAS_BACKEND True) # Ensure setting a string variable here is not overriden by oneMath CACHE variables cmake_policy(SET CMP0126 NEW) # Setting the device architecture is only needed and useful for AMD devices in oneMath set(HIP_TARGETS ${GGML_SYCL_DEVICE_ARCH} CACHE STRING "oneMath HIP target" FORCE) endif() FetchContent_Declare( ONEMATH GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git GIT_TAG 8efe85f5aaebb37f1d8c503b7af66315feabf142 ) FetchContent_MakeAvailable(ONEMATH) # Create alias to match with find_package targets name function(onemath_alias target) if (TARGET ${target}_obj) # Silence verbose warnings from external libraries target_compile_options(${target}_obj PRIVATE -w) endif() if (TARGET ${target}) add_library(ONEMATH::${target} ALIAS ${target}) endif() endfunction() onemath_alias(onemath) onemath_alias(onemath_blas_mklcpu) onemath_alias(onemath_blas_mklgpu) onemath_alias(onemath_blas_cublas) onemath_alias(onemath_blas_rocblas) endif() # Below oneMath compile-time dispatching is used for better performance if (GGML_SYCL_TARGET STREQUAL "NVIDIA") target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_cublas) target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda") target_link_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda") target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA) elseif (GGML_SYCL_TARGET STREQUAL "AMD") if (NOT GGML_SYCL_DEVICE_ARCH) message(FATAL_ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.") endif() target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas) target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa") target_link_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa") target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_AMD) else() # Fallback to oneMath runtime dispatcher target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath) target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GENERIC) endif() endif() if (GGML_SYCL_DEVICE_ARCH) target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}) target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}) endif()