Coverage for src/flag_gems/fused/__init__.py: 100%
47 statements
« prev ^ index » next coverage.py v7.6.9, created at 2026-05-27 08:02 +0800
« prev ^ index » next coverage.py v7.6.9, created at 2026-05-27 08:02 +0800
1from flag_gems.fused.apply_repetition_penalties import apply_repetition_penalties
2from flag_gems.fused.bincount import bincount
3from flag_gems.fused.chunk_gated_delta_rule import chunk_gated_delta_rule
4from flag_gems.fused.concat_and_cache_mla import concat_and_cache_mla
5from flag_gems.fused.cp_gather_indexer_k_quant_cache import (
6 cp_gather_indexer_k_quant_cache,
7)
8from flag_gems.fused.cross_entropy_loss import cross_entropy_loss
9from flag_gems.fused.cutlass_scaled_mm import cutlass_scaled_mm
10from flag_gems.fused.deepseek_v4_attention_combine_topk_swa_indices import (
11 combine_topk_swa_indices,
12)
13from flag_gems.fused.deepseek_v4_attention_compute_global_topk_indices_and_lens import (
14 compute_global_topk_indices_and_lens,
15)
16from flag_gems.fused.deepseek_v4_attention_dequantize_and_gather_k_cache import (
17 dequantize_and_gather_k_cache,
18)
19from flag_gems.fused.deepseek_v4_attention_fused_q_kv_rmsnorm import fused_q_kv_rmsnorm
20from flag_gems.fused.DSA.bin_topk import bucket_sort_topk
21from flag_gems.fused.FLA import (
22 chunk_gated_delta_rule_fwd,
23 fused_recurrent_gated_delta_rule_fwd,
24)
25from flag_gems.fused.flash_mla import flash_mla
26from flag_gems.fused.flashmla_sparse import flash_mla_sparse_fwd
27from flag_gems.fused.fused_add_rms_norm import fused_add_rms_norm
28from flag_gems.fused.fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert import (
29 fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert,
30)
31from flag_gems.fused.fused_inv_rope_fp8_quant import fused_inv_rope_fp8_quant
32from flag_gems.fused.fused_moe import (
33 dispatch_fused_moe_kernel,
34 fused_experts_impl,
35 inplace_fused_experts,
36 invoke_fused_moe_triton_kernel,
37 outplace_fused_experts,
38)
39from flag_gems.fused.geglu import dgeglu, geglu
40from flag_gems.fused.gelu_and_mul import gelu_and_mul
41from flag_gems.fused.grouped_topk import grouped_topk
42from flag_gems.fused.indexer_k_quant_and_cache import indexer_k_quant_and_cache
43from flag_gems.fused.instance_norm import instance_norm
44from flag_gems.fused.mhc import (
45 hc_head_fused_kernel,
46 hc_head_fused_kernel_ref,
47 mhc_bwd,
48 mhc_bwd_ref,
49 mhc_post,
50 mhc_pre,
51 sinkhorn_forward,
52)
53from flag_gems.fused.moe_align_block_size import (
54 moe_align_block_size,
55 moe_align_block_size_triton,
56)
57from flag_gems.fused.moe_sum import moe_sum
58from flag_gems.fused.outer import outer
59from flag_gems.fused.pack_seq import pack_seq_triton
60from flag_gems.fused.reglu import dreglu, reglu
61from flag_gems.fused.reshape_and_cache import reshape_and_cache
62from flag_gems.fused.reshape_and_cache_flash import reshape_and_cache_flash
63from flag_gems.fused.rotary_embedding import apply_rotary_pos_emb
64from flag_gems.fused.rwkv_ka_fusion import rwkv_ka_fusion
65from flag_gems.fused.rwkv_mm_sparsity import rwkv_mm_sparsity
66from flag_gems.fused.silu_and_mul import silu_and_mul, silu_and_mul_out
67from flag_gems.fused.silu_and_mul_with_clamp import (
68 silu_and_mul_with_clamp,
69 silu_and_mul_with_clamp_out,
70)
71from flag_gems.fused.skip_layernorm import skip_layer_norm
72from flag_gems.fused.sparse_attention import sparse_attn_triton
73from flag_gems.fused.swiglu import dswiglu, swiglu
74from flag_gems.fused.top_k_per_row_decode import top_k_per_row_decode
75from flag_gems.fused.top_k_per_row_prefill import top_k_per_row_prefill
76from flag_gems.fused.topk_softmax import topk_softmax
77from flag_gems.fused.topk_softplus_sqrt import topk_softplus_sqrt
78from flag_gems.fused.unpack_seq import unpack_seq_triton
79from flag_gems.fused.weight_norm import weight_norm
81__all__ = [
82 "apply_repetition_penalties",
83 "apply_rotary_pos_emb",
84 "bincount",
85 "bucket_sort_topk",
86 "chunk_gated_delta_rule",
87 "chunk_gated_delta_rule_fwd",
88 "combine_topk_swa_indices",
89 "compute_global_topk_indices_and_lens",
90 "concat_and_cache_mla",
91 "cp_gather_indexer_k_quant_cache",
92 "cross_entropy_loss",
93 "cutlass_scaled_mm",
94 "dequantize_and_gather_k_cache",
95 "dgeglu",
96 "dispatch_fused_moe_kernel",
97 "dreglu",
98 "dswiglu",
99 "flash_mla",
100 "flash_mla_sparse_fwd",
101 "fused_add_rms_norm",
102 "fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert",
103 "fused_experts_impl",
104 "fused_inv_rope_fp8_quant",
105 "fused_q_kv_rmsnorm",
106 "fused_recurrent_gated_delta_rule_fwd",
107 "geglu",
108 "gelu_and_mul",
109 "grouped_topk",
110 "hc_head_fused_kernel",
111 "hc_head_fused_kernel_ref",
112 "indexer_k_quant_and_cache",
113 "inplace_fused_experts",
114 "instance_norm",
115 "invoke_fused_moe_triton_kernel",
116 "mhc_bwd",
117 "mhc_bwd_ref",
118 "mhc_post",
119 "mhc_pre",
120 "moe_align_block_size",
121 "moe_align_block_size_triton",
122 "moe_sum",
123 "outer",
124 "outplace_fused_experts",
125 "pack_seq_triton",
126 "reglu",
127 "reshape_and_cache",
128 "reshape_and_cache_flash",
129 "rwkv_ka_fusion",
130 "rwkv_mm_sparsity",
131 "silu_and_mul",
132 "silu_and_mul_out",
133 "silu_and_mul_with_clamp",
134 "silu_and_mul_with_clamp_out",
135 "sinkhorn_forward",
136 "skip_layer_norm",
137 "sparse_attn_triton",
138 "swiglu",
139 "top_k_per_row_decode",
140 "top_k_per_row_prefill",
141 "topk_softmax",
142 "topk_softplus_sqrt",
143 "unpack_seq_triton",
144 "weight_norm",
145]