From e421be1634366c5a5d92f1e7e97e3c463103b7a2 Mon Sep 17 00:00:00 2001 From: Karim Iskakov Date: Fri, 21 Apr 2023 12:37:06 +0300 Subject: [PATCH] DALLE 2 --- bot/bot.py | 68 ++++++++++++++++++++++++++++++++++++++----- bot/config.py | 1 + bot/database.py | 1 + bot/openai_utils.py | 23 ++++++++++----- config/chat_modes.yml | 6 +++- config/models.yml | 4 +++ 6 files changed, 88 insertions(+), 15 deletions(-) diff --git a/bot/bot.py b/bot/bot.py index cefb14b..2b70cd3 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -8,6 +8,7 @@ import tempfile import pydub from pathlib import Path from datetime import datetime +import openai import telegram from telegram import ( @@ -91,6 +92,10 @@ async def register_user_if_not_exists(update: Update, context: CallbackContext, if db.get_user_attribute(user.id, "n_transcribed_seconds") is None: db.set_user_attribute(user.id, "n_transcribed_seconds", 0.0) + # image generation + if db.get_user_attribute(user.id, "n_generated_images") is None: + db.set_user_attribute(user.id, "n_generated_images", 0) + async def start_handle(update: Update, context: CallbackContext): await register_user_if_not_exists(update, context, update.message.from_user) @@ -142,14 +147,18 @@ async def message_handle(update: Update, context: CallbackContext, message=None, if await is_previous_message_not_answered_yet(update, context): return user_id = update.message.from_user.id - async def message_handle_fn(): - chat_mode = db.get_user_attribute(user_id, "current_chat_mode") + chat_mode = db.get_user_attribute(user_id, "current_chat_mode") + if chat_mode == "artist": + await generate_image_handle(update, context, message=message) + return + + async def message_handle_fn(): # new dialog timeout if use_new_dialog_timeout: if (datetime.now() - db.get_user_attribute(user_id, "last_interaction")).seconds > config.new_dialog_timeout and len(db.get_dialog_messages(user_id)) > 0: db.start_new_dialog(user_id) - await update.message.reply_text(f"Starting new dialog due to timeout ({openai_utils.CHAT_MODES[chat_mode]['name']} mode) βœ…", parse_mode=ParseMode.HTML) + await update.message.reply_text(f"Starting new dialog due to timeout ({config.chat_modes[chat_mode]['name']} mode) βœ…", parse_mode=ParseMode.HTML) db.set_user_attribute(user_id, "last_interaction", datetime.now()) # in case of CancelledError @@ -164,12 +173,15 @@ async def message_handle(update: Update, context: CallbackContext, message=None, await update.message.chat.send_action(action="typing") _message = message or update.message.text + if _message is None or len(_message) == 0: + await update.message.reply_text("πŸ₯² You sent empty message. Please, try again!", parse_mode=ParseMode.HTML) + return dialog_messages = db.get_dialog_messages(user_id, dialog_id=None) parse_mode = { "html": ParseMode.HTML, "markdown": ParseMode.MARKDOWN - }[openai_utils.CHAT_MODES[chat_mode]["parse_mode"]] + }[config.chat_modes[chat_mode]["parse_mode"]] chatgpt_instance = openai_utils.ChatGPT(model=current_model) if config.enable_message_streaming: @@ -289,6 +301,9 @@ async def voice_message_handle(update: Update, context: CallbackContext): with open(voice_mp3_path, "rb") as f: transcribed_text = await openai_utils.transcribe_audio(f) + if transcribed_text is None: + transcribed_text = "" + text = f"🎀: {transcribed_text}" await update.message.reply_text(text, parse_mode=ParseMode.HTML) @@ -298,6 +313,35 @@ async def voice_message_handle(update: Update, context: CallbackContext): await message_handle(update, context, message=transcribed_text) +async def generate_image_handle(update: Update, context: CallbackContext, message=None): + await register_user_if_not_exists(update, context, update.message.from_user) + if await is_previous_message_not_answered_yet(update, context): return + + user_id = update.message.from_user.id + db.set_user_attribute(user_id, "last_interaction", datetime.now()) + + await update.message.chat.send_action(action="upload_photo") + + message = message or update.message.text + + try: + image_urls = await openai_utils.generate_images(message, n_images=config.return_n_generated_images) + except openai.error.InvalidRequestError as e: + if str(e).startswith("Your request was rejected as a result of our safety system"): + text = "πŸ₯² Your request doesn't comply with OpenAI's usage policies.\nWhat did you write there, huh?" + await update.message.reply_text(text, parse_mode=ParseMode.HTML) + return + else: + raise + + # token usage + db.set_user_attribute(user_id, "n_generated_images", config.return_n_generated_images + db.get_user_attribute(user_id, "n_generated_images")) + + for i, image_url in enumerate(image_urls): + await update.message.chat.send_action(action="upload_photo") + await update.message.reply_photo(image_url, parse_mode=ParseMode.HTML) + + async def new_dialog_handle(update: Update, context: CallbackContext): await register_user_if_not_exists(update, context, update.message.from_user) if await is_previous_message_not_answered_yet(update, context): return @@ -309,7 +353,7 @@ async def new_dialog_handle(update: Update, context: CallbackContext): await update.message.reply_text("Starting new dialog βœ…") chat_mode = db.get_user_attribute(user_id, "current_chat_mode") - await update.message.reply_text(f"{openai_utils.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML) + await update.message.reply_text(f"{config.chat_modes[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML) async def cancel_handle(update: Update, context: CallbackContext): @@ -333,7 +377,7 @@ async def show_chat_modes_handle(update: Update, context: CallbackContext): db.set_user_attribute(user_id, "last_interaction", datetime.now()) keyboard = [] - for chat_mode, chat_mode_dict in openai_utils.CHAT_MODES.items(): + for chat_mode, chat_mode_dict in config.chat_modes.items(): keyboard.append([InlineKeyboardButton(chat_mode_dict["name"], callback_data=f"set_chat_mode|{chat_mode}")]) reply_markup = InlineKeyboardMarkup(keyboard) @@ -352,7 +396,7 @@ async def set_chat_mode_handle(update: Update, context: CallbackContext): db.set_user_attribute(user_id, "current_chat_mode", chat_mode) db.start_new_dialog(user_id) - await query.edit_message_text(f"{openai_utils.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML) + await query.edit_message_text(f"{config.chat_modes[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML) def get_settings_menu(user_id: int): @@ -422,6 +466,7 @@ async def show_balance_handle(update: Update, context: CallbackContext): total_n_used_tokens = 0 n_used_tokens_dict = db.get_user_attribute(user_id, "n_used_tokens") + n_generated_images = db.get_user_attribute(user_id, "n_generated_images") n_transcribed_seconds = db.get_user_attribute(user_id, "n_transcribed_seconds") details_text = "🏷️ Details:\n" @@ -435,12 +480,21 @@ async def show_balance_handle(update: Update, context: CallbackContext): details_text += f"- {model_key}: {n_input_spent_dollars + n_output_spent_dollars:.03f}$ / {n_input_tokens + n_output_tokens} tokens\n" + # image generation + image_generation_n_spent_dollars = config.models["info"]["dalle-2"]["price_per_1_image"] * n_generated_images + if n_generated_images != 0: + details_text += f"- DALLΒ·E 2 (image generation): {image_generation_n_spent_dollars:.03f}$ / {n_generated_images} generated images\n" + + total_n_spent_dollars += image_generation_n_spent_dollars + + # voice recognition voice_recognition_n_spent_dollars = config.models["info"]["whisper"]["price_per_1_min"] * (n_transcribed_seconds / 60) if n_transcribed_seconds != 0: details_text += f"- Whisper (voice recognition): {voice_recognition_n_spent_dollars:.03f}$ / {n_transcribed_seconds:.01f} seconds\n" total_n_spent_dollars += voice_recognition_n_spent_dollars + text = f"You spent {total_n_spent_dollars:.03f}$\n" text += f"You used {total_n_used_tokens} tokens\n\n" text += details_text diff --git a/bot/config.py b/bot/config.py index 5b6391f..2fb4c9a 100644 --- a/bot/config.py +++ b/bot/config.py @@ -18,6 +18,7 @@ use_chatgpt_api = config_yaml.get("use_chatgpt_api", True) allowed_telegram_usernames = config_yaml["allowed_telegram_usernames"] new_dialog_timeout = config_yaml["new_dialog_timeout"] enable_message_streaming = config_yaml.get("enable_message_streaming", True) +return_n_generated_images = config_yaml.get("return_n_generated_images", 1) mongodb_uri = f"mongodb://mongo:{config_env['MONGODB_PORT']}" # chat_modes diff --git a/bot/database.py b/bot/database.py index 92cb566..b6bafe3 100644 --- a/bot/database.py +++ b/bot/database.py @@ -49,6 +49,7 @@ class Database: "n_used_tokens": {}, + "n_generated_images": 0, "n_transcribed_seconds": 0.0 # voice message transcription } diff --git a/bot/openai_utils.py b/bot/openai_utils.py index 2b0837a..423a3a9 100644 --- a/bot/openai_utils.py +++ b/bot/openai_utils.py @@ -5,8 +5,6 @@ import openai openai.api_key = config.openai_api_key -CHAT_MODES = config.chat_modes - OPENAI_COMPLETION_OPTIONS = { "temperature": 0.7, "max_tokens": 1000, @@ -22,7 +20,7 @@ class ChatGPT: self.model = model async def send_message(self, message, dialog_messages=[], chat_mode="assistant"): - if chat_mode not in CHAT_MODES.keys(): + if chat_mode not in config.chat_modes.keys(): raise ValueError(f"Chat mode {chat_mode} is not supported") n_dialog_messages_before = len(dialog_messages) @@ -62,7 +60,7 @@ class ChatGPT: return answer, (n_input_tokens, n_output_tokens), n_first_dialog_messages_removed async def send_message_stream(self, message, dialog_messages=[], chat_mode="assistant"): - if chat_mode not in CHAT_MODES.keys(): + if chat_mode not in config.chat_modes.keys(): raise ValueError(f"Chat mode {chat_mode} is not supported") n_dialog_messages_before = len(dialog_messages) @@ -114,7 +112,7 @@ class ChatGPT: yield "finished", answer, (n_input_tokens, n_output_tokens), n_first_dialog_messages_removed # sending final answer def _generate_prompt(self, message, dialog_messages, chat_mode): - prompt = CHAT_MODES[chat_mode]["prompt_start"] + prompt = config.chat_modes[chat_mode]["prompt_start"] prompt += "\n\n" # add chat context @@ -131,7 +129,7 @@ class ChatGPT: return prompt def _generate_prompt_messages(self, message, dialog_messages, chat_mode): - prompt = CHAT_MODES[chat_mode]["prompt_start"] + prompt = config.chat_modes[chat_mode]["prompt_start"] messages = [{"role": "system", "content": prompt}] for dialog_message in dialog_messages: @@ -184,4 +182,15 @@ class ChatGPT: async def transcribe_audio(audio_file): r = await openai.Audio.atranscribe("whisper-1", audio_file) - return r["text"] \ No newline at end of file + return r["text"] + + +async def generate_images(prompt, n_images=4): + r = await openai.Image.acreate(prompt=prompt, n=n_images, size="512x512") + image_urls = [item.url for item in r.data] + return image_urls + + +async def is_content_acceptable(prompt): + r = await openai.Moderation.acreate(input=prompt) + return not all(r.results[0].categories.values()) \ No newline at end of file diff --git a/config/chat_modes.yml b/config/chat_modes.yml index 6b27851..e159d06 100644 --- a/config/chat_modes.yml +++ b/config/chat_modes.yml @@ -1,12 +1,12 @@ assistant: name: πŸ‘©πŸΌβ€πŸŽ“ General Assistant + model_type: text welcome_message: πŸ‘©πŸΌβ€πŸŽ“ Hi, I'm General Assistant. How can I help you? prompt_start: | As an advanced chatbot Assistant, your primary goal is to assist users to the best of your ability. This may involve answering questions, providing helpful information, or completing tasks based on user input. In order to effectively assist users, it is important to be detailed and thorough in your responses. Use examples and evidence to support your points and justify your recommendations or solutions. Remember to always prioritize the needs and satisfaction of the user. Your ultimate goal is to provide a helpful and enjoyable experience for the user. If user asks you about programming or asks to write code do not answer his question, but be sure to advise him to switch to a special mode \"πŸ‘©πŸΌβ€πŸ’» Code Assistant\" by sending the command /mode to chat. parse_mode: html - code_assistant: name: πŸ‘©πŸΌβ€πŸ’» Code Assistant welcome_message: πŸ‘©πŸΌβ€πŸ’» Hi, I'm Code Assistant. How can I help you? @@ -15,6 +15,10 @@ code_assistant: Format output in Markdown. parse_mode: markdown +artist: + name: πŸ‘©β€πŸŽ¨ Artist + welcome_message: πŸ‘©β€πŸŽ¨ Hi, I'm Artist. I'll draw anything you write me (e.g. Ginger cat selfie on Times Square, illustration) + text_improver: name: πŸ“ Text Improver welcome_message: πŸ“ Hi, I'm Text Improver. Send me any text – I'll improve it and correct all the mistakes diff --git a/config/models.yml b/config/models.yml index ac5ee7a..70f423b 100644 --- a/config/models.yml +++ b/config/models.yml @@ -40,6 +40,10 @@ info: Fast: 2 Cheap: 3 + dalle-2: + type: image + price_per_1_image: 0.018 # 512x512 + whisper: type: audio price_per_1_min: 0.006 \ No newline at end of file