diff --git a/Dockerfile b/Dockerfile index 7bdf8f3..7fd9f36 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ ENV PIP_DISABLE_PIP_VERSION_CHECK=on ENV PIP_DEFAULT_TIMEOUT=100 RUN apt-get update -RUN apt-get install -y python3 python3-pip python-dev build-essential python3-venv +RUN apt-get install -y python3 python3-pip python-dev build-essential python3-venv ffmpeg RUN mkdir -p /code ADD . /code diff --git a/README.md b/README.md index 9602a1b..cf745f1 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,13 @@ This repo is ChatGPT re-created with GPT-3.5 LLM as Telegram Bot. **And it works You can deploy your own bot, or use mine: [@chatgpt_karfly_bot](https://t.me/chatgpt_karfly_bot) ## News +- *8 Mar 2023*: Added voice message recognition with [OpenAI Whisper API](https://openai.com/blog/introducing-chatgpt-and-whisper-apis). Record a voice message and ChatGPT will answer you! - *2 Mar 2023*: Added support of [ChatGPT API](https://platform.openai.com/docs/guides/chat/introduction). It's enabled by default and can be disabled with `use_chatgpt_api` option in config. Don't forget to **rebuild** you docker image (`--build`). ## Features - Low latency replies (it usually takes about 3-5 seconds) - No request limits +- Voice message recognition - Code highlighting - Special chat modes: 👩🏼‍🎓 Assistant, 👩🏼‍💻 Code Assistant, 🎬 Movie Expert. More soon - Support of [ChatGPT API](https://platform.openai.com/docs/guides/chat/introduction) diff --git a/bot/bot.py b/bot/bot.py index ccf3396..6c220d1 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -3,6 +3,9 @@ import logging import traceback import html import json +import tempfile +import pydub +from pathlib import Path from datetime import datetime import telegram @@ -19,7 +22,7 @@ from telegram.constants import ParseMode, ChatAction import config import database -import chatgpt +import openai_utils # setup @@ -109,7 +112,7 @@ async def message_handle(update: Update, context: CallbackContext, message=None, try: message = message or update.message.text - chatgpt_instance = chatgpt.ChatGPT(use_chatgpt_api=config.use_chatgpt_api) + chatgpt_instance = openai_utils.ChatGPT(use_chatgpt_api=config.use_chatgpt_api) answer, n_used_tokens, n_first_dialog_messages_removed = await chatgpt_instance.send_message( message, dialog_messages=db.get_dialog_messages(user_id, dialog_id=None), @@ -147,6 +150,42 @@ async def message_handle(update: Update, context: CallbackContext, message=None, await update.message.reply_text(answer) +async def voice_message_handle(update: Update, context: CallbackContext): + await register_user_if_not_exists(update, context, update.message.from_user) + user_id = update.message.from_user.id + db.set_user_attribute(user_id, "last_interaction", datetime.now()) + + voice = update.message.voice + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_dir = Path(tmp_dir) + voice_ogg_path = tmp_dir / "voice.ogg" + + # download + voice_file = await context.bot.get_file(voice.file_id) + await voice_file.download_to_drive(voice_ogg_path) + + # convert to mp3 + voice_mp3_path = tmp_dir / "voice.mp3" + pydub.AudioSegment.from_file(voice_ogg_path).export(voice_mp3_path, format="mp3") + + # transcribe + with open(voice_mp3_path, "rb") as f: + transcribed_text = await openai_utils.transcribe_audio(f) + + text = f"🎤: {transcribed_text}" + await update.message.reply_text(text, parse_mode=ParseMode.HTML) + + await message_handle(update, context, message=transcribed_text) + + # calculate spent dollars + n_spent_dollars = voice.duration * (config.whisper_price_per_1_min / 60) + + # normalize dollars to tokens (it's very convenient to measure everything in a single unit) + price_per_1000_tokens = config.chatgpt_price_per_1000_tokens if config.use_chatgpt_api else config.gpt_price_per_1000_tokens + n_used_tokens = int(n_spent_dollars / (price_per_1000_tokens / 1000)) + db.set_user_attribute(user_id, "n_used_tokens", n_used_tokens + db.get_user_attribute(user_id, "n_used_tokens")) + + async def new_dialog_handle(update: Update, context: CallbackContext): await register_user_if_not_exists(update, context, update.message.from_user) user_id = update.message.from_user.id @@ -156,7 +195,7 @@ async def new_dialog_handle(update: Update, context: CallbackContext): await update.message.reply_text("Starting new dialog ✅") chat_mode = db.get_user_attribute(user_id, "current_chat_mode") - await update.message.reply_text(f"{chatgpt.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML) + await update.message.reply_text(f"{openai_utils.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML) async def show_chat_modes_handle(update: Update, context: CallbackContext): @@ -165,7 +204,7 @@ async def show_chat_modes_handle(update: Update, context: CallbackContext): db.set_user_attribute(user_id, "last_interaction", datetime.now()) keyboard = [] - for chat_mode, chat_mode_dict in chatgpt.CHAT_MODES.items(): + for chat_mode, chat_mode_dict in openai_utils.CHAT_MODES.items(): keyboard.append([InlineKeyboardButton(chat_mode_dict["name"], callback_data=f"set_chat_mode|{chat_mode}")]) reply_markup = InlineKeyboardMarkup(keyboard) @@ -185,11 +224,11 @@ async def set_chat_mode_handle(update: Update, context: CallbackContext): db.start_new_dialog(user_id) await query.edit_message_text( - f"{chatgpt.CHAT_MODES[chat_mode]['name']} chat mode is set", + f"{openai_utils.CHAT_MODES[chat_mode]['name']} chat mode is set", parse_mode=ParseMode.HTML ) - await query.edit_message_text(f"{chatgpt.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML) + await query.edit_message_text(f"{openai_utils.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML) async def show_balance_handle(update: Update, context: CallbackContext): @@ -200,11 +239,15 @@ async def show_balance_handle(update: Update, context: CallbackContext): n_used_tokens = db.get_user_attribute(user_id, "n_used_tokens") - price = 0.002 if config.use_chatgpt_api else 0.02 - n_spent_dollars = n_used_tokens * (price / 1000) + price_per_1000_tokens = config.chatgpt_price_per_1000_tokens if config.use_chatgpt_api else config.gpt_price_per_1000_tokens + n_spent_dollars = n_used_tokens * (price_per_1000_tokens / 1000) text = f"You spent {n_spent_dollars:.03f}$\n" - text += f"You used {n_used_tokens} tokens (price: {price}$ per 1000 tokens)\n" + text += f"You used {n_used_tokens} tokens\n\n" + + text += "🏷️ Prices\n" + text += f"- ChatGPT: {price_per_1000_tokens}$ per 1000 tokens\n" + text += f"- Whisper (voice recognition): {config.whisper_price_per_1_min}$ per 1 minute" await update.message.reply_text(text, parse_mode=ParseMode.HTML) @@ -257,6 +300,8 @@ def run_bot() -> None: application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND & user_filter, message_handle)) application.add_handler(CommandHandler("retry", retry_handle, filters=user_filter)) application.add_handler(CommandHandler("new", new_dialog_handle, filters=user_filter)) + + application.add_handler(MessageHandler(filters.VOICE & user_filter, voice_message_handle)) application.add_handler(CommandHandler("mode", show_chat_modes_handle, filters=user_filter)) application.add_handler(CallbackQueryHandler(set_chat_mode_handle, pattern="^set_chat_mode")) diff --git a/bot/config.py b/bot/config.py index 4b6c0dc..cc7cb6c 100644 --- a/bot/config.py +++ b/bot/config.py @@ -18,3 +18,8 @@ use_chatgpt_api = config_yaml.get("use_chatgpt_api", True) allowed_telegram_usernames = config_yaml["allowed_telegram_usernames"] new_dialog_timeout = config_yaml["new_dialog_timeout"] mongodb_uri = f"mongodb://mongo:{config_env['MONGODB_PORT']}" + +# prices +chatgpt_price_per_1000_tokens = config_yaml.get("chatgpt_price_per_1000_tokens", 0.002) +gpt_price_per_1000_tokens = config_yaml.get("gpt_price_per_1000_tokens", 0.02) +whisper_price_per_1_min = config_yaml.get("whisper_price_per_1_min", 0.006) diff --git a/bot/chatgpt.py b/bot/openai_utils.py similarity index 97% rename from bot/chatgpt.py rename to bot/openai_utils.py index 5de94e6..88fa055 100644 --- a/bot/chatgpt.py +++ b/bot/openai_utils.py @@ -112,4 +112,9 @@ class ChatGPT: def _postprocess_answer(self, answer): answer = answer.strip() - return answer \ No newline at end of file + return answer + + +async def transcribe_audio(audio_file): + r = await openai.Audio.atranscribe("whisper-1", audio_file) + return r["text"] \ No newline at end of file diff --git a/config/config.example.yml b/config/config.example.yml index 725429b..9a44c85 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -2,4 +2,8 @@ telegram_token: "" openai_api_key: "" use_chatgpt_api: true allowed_telegram_usernames: [] # if empty, the bot is available to anyone -new_dialog_timeout: 600 # new dialog starts after timeout (in seconds) \ No newline at end of file +new_dialog_timeout: 600 # new dialog starts after timeout (in seconds) + +chatgpt_price_per_1000_tokens: 0.002 +gpt_price_per_1000_tokens: 0.02 +whisper_price_per_1_min: 0.006 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e4bed0f..c6d4c3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ python-telegram-bot==20.1 openai>=0.27.0 PyYAML==6.0 pymongo==4.3.3 -python-dotenv==0.21.0 \ No newline at end of file +python-dotenv==0.21.0 +pydub==0.25.1 \ No newline at end of file