You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
1.1 KiB
31 lines
1.1 KiB
try:
|
|
import tiktoken
|
|
TIKTOKEN_AVAILABLE = True
|
|
except ImportError:
|
|
TIKTOKEN_AVAILABLE = False
|
|
|
|
def count_str_token(string: str) -> int:
|
|
"""Returns the number of tokens in a text string."""
|
|
if TIKTOKEN_AVAILABLE:
|
|
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
|
num_tokens = len(encoding.encode(string))
|
|
return num_tokens
|
|
else:
|
|
# Fallback heuristic: approximately 4 characters per token for English text
|
|
return len(string) // 4
|
|
|
|
def count_deepseek_tokens(text: str) -> int:
|
|
"""Estimate tokens for DeepSeek API using heuristic approach."""
|
|
# DeepSeek token estimation: approximately 4 characters per token
|
|
return len(text) // 4
|
|
|
|
def count_config_token(config) -> int:
|
|
sum = 0
|
|
# Check if this is a DeepSeek config
|
|
if config.get('model', '').startswith('deepseek'):
|
|
for message in config['messages']:
|
|
sum += count_deepseek_tokens(message['content'])
|
|
else:
|
|
for message in config['messages']:
|
|
sum += count_str_token(message['content'])
|
|
return sum |