Coverage for src/flag_gems/fused/__init__.py: 100%

47 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2026-05-27 08:02 +0800

1from flag_gems.fused.apply_repetition_penalties import apply_repetition_penalties 

2from flag_gems.fused.bincount import bincount 

3from flag_gems.fused.chunk_gated_delta_rule import chunk_gated_delta_rule 

4from flag_gems.fused.concat_and_cache_mla import concat_and_cache_mla 

5from flag_gems.fused.cp_gather_indexer_k_quant_cache import ( 

6 cp_gather_indexer_k_quant_cache, 

7) 

8from flag_gems.fused.cross_entropy_loss import cross_entropy_loss 

9from flag_gems.fused.cutlass_scaled_mm import cutlass_scaled_mm 

10from flag_gems.fused.deepseek_v4_attention_combine_topk_swa_indices import ( 

11 combine_topk_swa_indices, 

12) 

13from flag_gems.fused.deepseek_v4_attention_compute_global_topk_indices_and_lens import ( 

14 compute_global_topk_indices_and_lens, 

15) 

16from flag_gems.fused.deepseek_v4_attention_dequantize_and_gather_k_cache import ( 

17 dequantize_and_gather_k_cache, 

18) 

19from flag_gems.fused.deepseek_v4_attention_fused_q_kv_rmsnorm import fused_q_kv_rmsnorm 

20from flag_gems.fused.DSA.bin_topk import bucket_sort_topk 

21from flag_gems.fused.FLA import ( 

22 chunk_gated_delta_rule_fwd, 

23 fused_recurrent_gated_delta_rule_fwd, 

24) 

25from flag_gems.fused.flash_mla import flash_mla 

26from flag_gems.fused.flashmla_sparse import flash_mla_sparse_fwd 

27from flag_gems.fused.fused_add_rms_norm import fused_add_rms_norm 

28from flag_gems.fused.fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert import ( 

29 fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert, 

30) 

31from flag_gems.fused.fused_inv_rope_fp8_quant import fused_inv_rope_fp8_quant 

32from flag_gems.fused.fused_moe import ( 

33 dispatch_fused_moe_kernel, 

34 fused_experts_impl, 

35 inplace_fused_experts, 

36 invoke_fused_moe_triton_kernel, 

37 outplace_fused_experts, 

38) 

39from flag_gems.fused.geglu import dgeglu, geglu 

40from flag_gems.fused.gelu_and_mul import gelu_and_mul 

41from flag_gems.fused.grouped_topk import grouped_topk 

42from flag_gems.fused.indexer_k_quant_and_cache import indexer_k_quant_and_cache 

43from flag_gems.fused.instance_norm import instance_norm 

44from flag_gems.fused.mhc import ( 

45 hc_head_fused_kernel, 

46 hc_head_fused_kernel_ref, 

47 mhc_bwd, 

48 mhc_bwd_ref, 

49 mhc_post, 

50 mhc_pre, 

51 sinkhorn_forward, 

52) 

53from flag_gems.fused.moe_align_block_size import ( 

54 moe_align_block_size, 

55 moe_align_block_size_triton, 

56) 

57from flag_gems.fused.moe_sum import moe_sum 

58from flag_gems.fused.outer import outer 

59from flag_gems.fused.pack_seq import pack_seq_triton 

60from flag_gems.fused.reglu import dreglu, reglu 

61from flag_gems.fused.reshape_and_cache import reshape_and_cache 

62from flag_gems.fused.reshape_and_cache_flash import reshape_and_cache_flash 

63from flag_gems.fused.rotary_embedding import apply_rotary_pos_emb 

64from flag_gems.fused.rwkv_ka_fusion import rwkv_ka_fusion 

65from flag_gems.fused.rwkv_mm_sparsity import rwkv_mm_sparsity 

66from flag_gems.fused.silu_and_mul import silu_and_mul, silu_and_mul_out 

67from flag_gems.fused.silu_and_mul_with_clamp import ( 

68 silu_and_mul_with_clamp, 

69 silu_and_mul_with_clamp_out, 

70) 

71from flag_gems.fused.skip_layernorm import skip_layer_norm 

72from flag_gems.fused.sparse_attention import sparse_attn_triton 

73from flag_gems.fused.swiglu import dswiglu, swiglu 

74from flag_gems.fused.top_k_per_row_decode import top_k_per_row_decode 

75from flag_gems.fused.top_k_per_row_prefill import top_k_per_row_prefill 

76from flag_gems.fused.topk_softmax import topk_softmax 

77from flag_gems.fused.topk_softplus_sqrt import topk_softplus_sqrt 

78from flag_gems.fused.unpack_seq import unpack_seq_triton 

79from flag_gems.fused.weight_norm import weight_norm 

80 

81__all__ = [ 

82 "apply_repetition_penalties", 

83 "apply_rotary_pos_emb", 

84 "bincount", 

85 "bucket_sort_topk", 

86 "chunk_gated_delta_rule", 

87 "chunk_gated_delta_rule_fwd", 

88 "combine_topk_swa_indices", 

89 "compute_global_topk_indices_and_lens", 

90 "concat_and_cache_mla", 

91 "cp_gather_indexer_k_quant_cache", 

92 "cross_entropy_loss", 

93 "cutlass_scaled_mm", 

94 "dequantize_and_gather_k_cache", 

95 "dgeglu", 

96 "dispatch_fused_moe_kernel", 

97 "dreglu", 

98 "dswiglu", 

99 "flash_mla", 

100 "flash_mla_sparse_fwd", 

101 "fused_add_rms_norm", 

102 "fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert", 

103 "fused_experts_impl", 

104 "fused_inv_rope_fp8_quant", 

105 "fused_q_kv_rmsnorm", 

106 "fused_recurrent_gated_delta_rule_fwd", 

107 "geglu", 

108 "gelu_and_mul", 

109 "grouped_topk", 

110 "hc_head_fused_kernel", 

111 "hc_head_fused_kernel_ref", 

112 "indexer_k_quant_and_cache", 

113 "inplace_fused_experts", 

114 "instance_norm", 

115 "invoke_fused_moe_triton_kernel", 

116 "mhc_bwd", 

117 "mhc_bwd_ref", 

118 "mhc_post", 

119 "mhc_pre", 

120 "moe_align_block_size", 

121 "moe_align_block_size_triton", 

122 "moe_sum", 

123 "outer", 

124 "outplace_fused_experts", 

125 "pack_seq_triton", 

126 "reglu", 

127 "reshape_and_cache", 

128 "reshape_and_cache_flash", 

129 "rwkv_ka_fusion", 

130 "rwkv_mm_sparsity", 

131 "silu_and_mul", 

132 "silu_and_mul_out", 

133 "silu_and_mul_with_clamp", 

134 "silu_and_mul_with_clamp_out", 

135 "sinkhorn_forward", 

136 "skip_layer_norm", 

137 "sparse_attn_triton", 

138 "swiglu", 

139 "top_k_per_row_decode", 

140 "top_k_per_row_prefill", 

141 "topk_softmax", 

142 "topk_softplus_sqrt", 

143 "unpack_seq_triton", 

144 "weight_norm", 

145]