hpcaitech
diff --git a/‎tests/test_infer/test_config_and_struct.py‎
Lines changed: 3 additions & 0 deletions b/‎tests/test_infer/test_config_and_struct.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/test_infer_ops/triton/kernel_utils.py‎ renamed to ‎tests/test_infer/test_ops/triton/kernel_utils.py‎ b/‎tests/test_infer_ops/triton/kernel_utils.py‎ renamed to ‎tests/test_infer/test_ops/triton/kernel_utils.py‎
diff --git a/‎tests/test_infer_ops/triton/test_context_attn_unpad.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_context_attn_unpad.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/test_infer_ops/triton/test_context_attn_unpad.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_context_attn_unpad.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_infer_ops/triton/test_decoding_attn.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_decoding_attn.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/test_infer_ops/triton/test_decoding_attn.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_decoding_attn.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_infer_ops/triton/test_fused_rotary_embedding.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_fused_rotary_embedding.py‎ b/‎tests/test_infer_ops/triton/test_fused_rotary_embedding.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_fused_rotary_embedding.py‎
diff --git a/‎tests/test_infer_ops/triton/test_kvcache_copy.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_kvcache_copy.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/test_infer_ops/triton/test_kvcache_copy.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_kvcache_copy.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_infer_ops/triton/test_rmsnorm_triton.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_rmsnorm_triton.py‎ b/‎tests/test_infer_ops/triton/test_rmsnorm_triton.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_rmsnorm_triton.py‎
diff --git a/‎tests/test_infer_ops/triton/test_rotary_embdding_unpad.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_rotary_embdding_unpad.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/test_infer_ops/triton/test_rotary_embdding_unpad.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_rotary_embdding_unpad.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_infer_ops/triton/test_xine_copy.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_xine_copy.py‎ b/‎tests/test_infer_ops/triton/test_xine_copy.py‎ renamed to ‎tests/test_infer/test_ops/triton/test_xine_copy.py‎
@@ -63,6 +63,9 @@ def check_config_and_inference():
     batch.add_seqs([sequence])
     batch.add_seqs([sequence2, sequence3])
 
+    # add duplicated sequence to test that it will not be counted twice
+    batch.add_seqs([sequence])
+
     assert batch.is_empty == False
     assert batch.get_batch_size() == 3
     batch.update_batch_tokens([1, 2, 3])
 
@@ -6,7 +6,7 @@
 from colossalai.inference.modeling.layers.attention import PagedAttention
 from colossalai.kernel.triton import context_attention_unpadded
 from colossalai.utils import get_current_device
-from tests.test_infer_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, torch_attn_ref
+from tests.test_infer.test_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, torch_attn_ref
 
 try:
     import triton  # noqa
 
@@ -4,7 +4,7 @@
 
 from colossalai.kernel.triton import flash_decoding_attention
 from colossalai.utils import get_current_device
-from tests.test_infer_ops.triton.kernel_utils import (
+from tests.test_infer.test_ops.triton.kernel_utils import (
     convert_kv_unpad_to_padded,
     generate_caches_and_block_tables_v2,
     prepare_padding_mask,
 
@@ -5,7 +5,7 @@
 from colossalai.inference.modeling.layers.attention import copy_to_cache
 from colossalai.kernel.triton import copy_kv_to_blocked_cache
 from colossalai.utils import get_current_device
-from tests.test_infer_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, mock_alloc_single_token
+from tests.test_infer.test_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, mock_alloc_single_token
 
 try:
     import triton  # noqa
 
@@ -4,7 +4,7 @@
 from transformers.models.llama.modeling_llama import LlamaRotaryEmbedding, apply_rotary_pos_emb
 
 from colossalai.kernel.triton import rotary_embedding
-from tests.test_infer_ops.triton.kernel_utils import mock_alloc_block_table_and_kvcache_v2
+from tests.test_infer.test_ops.triton.kernel_utils import mock_alloc_block_table_and_kvcache_v2
 
 try:
     import triton  # noqa