# Copyright (c) OpenMMLab. All rights reserved. add_library(attention STATIC attention.cu decoding.cu reduce.cu kv_cache_utils_v2.cu utils.cc codegen/attention_sm70_128_f16.cu codegen/attention_sm75_128_f16.cu codegen/attention_sm80_128_bf16.cu codegen/attention_sm80_128_f16.cu codegen/decoding_sm70_128_f16_f16.cu codegen/decoding_sm70_128_f16_u4.cu codegen/decoding_sm70_128_f16_u8.cu codegen/decoding_sm75_128_f16_f16.cu codegen/decoding_sm75_128_f16_u4.cu codegen/decoding_sm75_128_f16_u8.cu codegen/decoding_sm80_128_bf16_bf16.cu codegen/decoding_sm80_128_bf16_u4.cu codegen/decoding_sm80_128_bf16_u8.cu codegen/decoding_sm80_128_f16_f16.cu codegen/decoding_sm80_128_f16_u4.cu codegen/decoding_sm80_128_f16_u8.cu codegen/attention_sm70_64_f16.cu codegen/attention_sm75_64_f16.cu codegen/attention_sm80_64_bf16.cu codegen/attention_sm80_64_f16.cu codegen/decoding_sm70_64_f16_f16.cu codegen/decoding_sm70_64_f16_u4.cu codegen/decoding_sm70_64_f16_u8.cu codegen/decoding_sm75_64_f16_f16.cu codegen/decoding_sm75_64_f16_u4.cu codegen/decoding_sm75_64_f16_u8.cu codegen/decoding_sm80_64_bf16_bf16.cu codegen/decoding_sm80_64_bf16_u4.cu codegen/decoding_sm80_64_bf16_u8.cu codegen/decoding_sm80_64_f16_f16.cu codegen/decoding_sm80_64_f16_u4.cu codegen/decoding_sm80_64_f16_u8.cu codegen/attention_sm80_192.cu codegen/decoding_sm80_192.cu ) set_property(TARGET attention PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET attention PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) target_compile_options(attention PRIVATE -O3 $<$:-use_fast_math --expt-relaxed-constexpr>) if (BUILD_TEST) target_compile_options(attention PRIVATE $<$:-Xptxas=-v --generate-line-info>) add_executable(test_attention test_utils.cu test_attention.cu reference.cu) target_compile_options(test_attention PRIVATE --generate-line-info -O3 -use_fast_math --expt-relaxed-constexpr) target_link_libraries(test_attention PRIVATE attention flash_attention Llama unfused_attention_kernels logger tensor cublas) add_executable(test_quant test_quant.cu test_utils.cu) target_compile_options(test_quant PRIVATE --generate-line-info -O3 -use_fast_math --expt-relaxed-constexpr) endif ()