From e421be1634366c5a5d92f1e7e97e3c463103b7a2 Mon Sep 17 00:00:00 2001
From: Karim Iskakov <kar.iskakov@gmail.com>
Date: Fri, 21 Apr 2023 12:37:06 +0300
Subject: [PATCH] DALLE 2

---
 bot/bot.py            | 68 ++++++++++++++++++++++++++++++++++++++-----
 bot/config.py         |  1 +
 bot/database.py       |  1 +
 bot/openai_utils.py   | 23 ++++++++++-----
 config/chat_modes.yml |  6 +++-
 config/models.yml     |  4 +++
 6 files changed, 88 insertions(+), 15 deletions(-)
diff --git a/bot/bot.py b/bot/bot.py
index cefb14b..2b70cd3 100644
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -8,6 +8,7 @@ import tempfile
 import pydub
 from pathlib import Path
 from datetime import datetime
+import openai
 
 import telegram
 from telegram import (
@@ -91,6 +92,10 @@ async def register_user_if_not_exists(update: Update, context: CallbackContext,
     if db.get_user_attribute(user.id, "n_transcribed_seconds") is None:
         db.set_user_attribute(user.id, "n_transcribed_seconds", 0.0)
 
+    # image generation
+    if db.get_user_attribute(user.id, "n_generated_images") is None:
+        db.set_user_attribute(user.id, "n_generated_images", 0)
+
 
 async def start_handle(update: Update, context: CallbackContext):
     await register_user_if_not_exists(update, context, update.message.from_user)
@@ -142,14 +147,18 @@ async def message_handle(update: Update, context: CallbackContext, message=None,
     if await is_previous_message_not_answered_yet(update, context): return
 
     user_id = update.message.from_user.id
-    async def message_handle_fn():
-        chat_mode = db.get_user_attribute(user_id, "current_chat_mode")
+    chat_mode = db.get_user_attribute(user_id, "current_chat_mode")
 
+    if chat_mode == "artist":
+        await generate_image_handle(update, context, message=message)
+        return
+
+    async def message_handle_fn():
         # new dialog timeout
         if use_new_dialog_timeout:
             if (datetime.now() - db.get_user_attribute(user_id, "last_interaction")).seconds > config.new_dialog_timeout and len(db.get_dialog_messages(user_id)) > 0:
                 db.start_new_dialog(user_id)
-                await update.message.reply_text(f"Starting new dialog due to timeout (<b>{openai_utils.CHAT_MODES[chat_mode]['name']}</b> mode) ✅", parse_mode=ParseMode.HTML)
+                await update.message.reply_text(f"Starting new dialog due to timeout (<b>{config.chat_modes[chat_mode]['name']}</b> mode) ✅", parse_mode=ParseMode.HTML)
         db.set_user_attribute(user_id, "last_interaction", datetime.now())
 
         # in case of CancelledError
@@ -164,12 +173,15 @@ async def message_handle(update: Update, context: CallbackContext, message=None,
             await update.message.chat.send_action(action="typing")
 
             _message = message or update.message.text
+            if _message is None or len(_message) == 0:
+                 await update.message.reply_text("🥲 You sent <b>empty message</b>. Please, try again!", parse_mode=ParseMode.HTML)
+                 return
 
             dialog_messages = db.get_dialog_messages(user_id, dialog_id=None)
             parse_mode = {
                 "html": ParseMode.HTML,
                 "markdown": ParseMode.MARKDOWN
-            }[openai_utils.CHAT_MODES[chat_mode]["parse_mode"]]
+            }[config.chat_modes[chat_mode]["parse_mode"]]
 
             chatgpt_instance = openai_utils.ChatGPT(model=current_model)
             if config.enable_message_streaming:
@@ -289,6 +301,9 @@ async def voice_message_handle(update: Update, context: CallbackContext):
         with open(voice_mp3_path, "rb") as f:
             transcribed_text = await openai_utils.transcribe_audio(f)
 
+            if transcribed_text is None:
+                 transcribed_text = ""
+
     text = f"🎤: <i>{transcribed_text}</i>"
     await update.message.reply_text(text, parse_mode=ParseMode.HTML)
 
@@ -298,6 +313,35 @@ async def voice_message_handle(update: Update, context: CallbackContext):
     await message_handle(update, context, message=transcribed_text)
 
 
+async def generate_image_handle(update: Update, context: CallbackContext, message=None):
+    await register_user_if_not_exists(update, context, update.message.from_user)
+    if await is_previous_message_not_answered_yet(update, context): return
+
+    user_id = update.message.from_user.id
+    db.set_user_attribute(user_id, "last_interaction", datetime.now())
+
+    await update.message.chat.send_action(action="upload_photo")
+
+    message = message or update.message.text
+
+    try:
+        image_urls = await openai_utils.generate_images(message, n_images=config.return_n_generated_images)
+    except openai.error.InvalidRequestError as e:
+        if str(e).startswith("Your request was rejected as a result of our safety system"):
+            text = "🥲 Your request <b>doesn't comply</b> with OpenAI's usage policies.\nWhat did you write there, huh?"
+            await update.message.reply_text(text, parse_mode=ParseMode.HTML)
+            return
+        else:
+            raise
+
+    # token usage
+    db.set_user_attribute(user_id, "n_generated_images", config.return_n_generated_images + db.get_user_attribute(user_id, "n_generated_images"))
+
+    for i, image_url in enumerate(image_urls):
+        await update.message.chat.send_action(action="upload_photo")
+        await update.message.reply_photo(image_url, parse_mode=ParseMode.HTML)
+
+
 async def new_dialog_handle(update: Update, context: CallbackContext):
     await register_user_if_not_exists(update, context, update.message.from_user)
     if await is_previous_message_not_answered_yet(update, context): return
@@ -309,7 +353,7 @@ async def new_dialog_handle(update: Update, context: CallbackContext):
     await update.message.reply_text("Starting new dialog ✅")
 
     chat_mode = db.get_user_attribute(user_id, "current_chat_mode")
-    await update.message.reply_text(f"{openai_utils.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML)
+    await update.message.reply_text(f"{config.chat_modes[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML)
 
 
 async def cancel_handle(update: Update, context: CallbackContext):
@@ -333,7 +377,7 @@ async def show_chat_modes_handle(update: Update, context: CallbackContext):
     db.set_user_attribute(user_id, "last_interaction", datetime.now())
 
     keyboard = []
-    for chat_mode, chat_mode_dict in openai_utils.CHAT_MODES.items():
+    for chat_mode, chat_mode_dict in config.chat_modes.items():
         keyboard.append([InlineKeyboardButton(chat_mode_dict["name"], callback_data=f"set_chat_mode|{chat_mode}")])
     reply_markup = InlineKeyboardMarkup(keyboard)
 
@@ -352,7 +396,7 @@ async def set_chat_mode_handle(update: Update, context: CallbackContext):
     db.set_user_attribute(user_id, "current_chat_mode", chat_mode)
     db.start_new_dialog(user_id)
 
-    await query.edit_message_text(f"{openai_utils.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML)
+    await query.edit_message_text(f"{config.chat_modes[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML)
 
 
 def get_settings_menu(user_id: int):
@@ -422,6 +466,7 @@ async def show_balance_handle(update: Update, context: CallbackContext):
     total_n_used_tokens = 0
 
     n_used_tokens_dict = db.get_user_attribute(user_id, "n_used_tokens")
+    n_generated_images = db.get_user_attribute(user_id, "n_generated_images")
     n_transcribed_seconds = db.get_user_attribute(user_id, "n_transcribed_seconds")
 
     details_text = "🏷️ Details:\n"
@@ -435,12 +480,21 @@ async def show_balance_handle(update: Update, context: CallbackContext):
 
         details_text += f"- {model_key}: <b>{n_input_spent_dollars + n_output_spent_dollars:.03f}$</b> / <b>{n_input_tokens + n_output_tokens} tokens</b>\n"
 
+    # image generation
+    image_generation_n_spent_dollars = config.models["info"]["dalle-2"]["price_per_1_image"] * n_generated_images
+    if n_generated_images != 0:
+        details_text += f"- DALL·E 2 (image generation): <b>{image_generation_n_spent_dollars:.03f}$</b> / <b>{n_generated_images} generated images</b>\n"
+
+    total_n_spent_dollars += image_generation_n_spent_dollars
+
+    # voice recognition
     voice_recognition_n_spent_dollars = config.models["info"]["whisper"]["price_per_1_min"] * (n_transcribed_seconds / 60)
     if n_transcribed_seconds != 0:
         details_text += f"- Whisper (voice recognition): <b>{voice_recognition_n_spent_dollars:.03f}$</b> / <b>{n_transcribed_seconds:.01f} seconds</b>\n"
 
     total_n_spent_dollars += voice_recognition_n_spent_dollars
 
+
     text = f"You spent <b>{total_n_spent_dollars:.03f}$</b>\n"
     text += f"You used <b>{total_n_used_tokens}</b> tokens\n\n"
     text += details_text
diff --git a/bot/config.py b/bot/config.py
index 5b6391f..2fb4c9a 100644
--- a/bot/config.py
+++ b/bot/config.py
@@ -18,6 +18,7 @@ use_chatgpt_api = config_yaml.get("use_chatgpt_api", True)
 allowed_telegram_usernames = config_yaml["allowed_telegram_usernames"]
 new_dialog_timeout = config_yaml["new_dialog_timeout"]
 enable_message_streaming = config_yaml.get("enable_message_streaming", True)
+return_n_generated_images = config_yaml.get("return_n_generated_images", 1)
 mongodb_uri = f"mongodb://mongo:{config_env['MONGODB_PORT']}"
 
 # chat_modes
diff --git a/bot/database.py b/bot/database.py
index 92cb566..b6bafe3 100644
--- a/bot/database.py
+++ b/bot/database.py
@@ -49,6 +49,7 @@ class Database:
 
             "n_used_tokens": {},
 
+            "n_generated_images": 0,
             "n_transcribed_seconds": 0.0  # voice message transcription
         }
 
diff --git a/bot/openai_utils.py b/bot/openai_utils.py
index 2b0837a..423a3a9 100644
--- a/bot/openai_utils.py
+++ b/bot/openai_utils.py
@@ -5,8 +5,6 @@ import openai
 openai.api_key = config.openai_api_key
 
 
-CHAT_MODES = config.chat_modes
-
 OPENAI_COMPLETION_OPTIONS = {
     "temperature": 0.7,
     "max_tokens": 1000,
@@ -22,7 +20,7 @@ class ChatGPT:
         self.model = model
 
     async def send_message(self, message, dialog_messages=[], chat_mode="assistant"):
-        if chat_mode not in CHAT_MODES.keys():
+        if chat_mode not in config.chat_modes.keys():
             raise ValueError(f"Chat mode {chat_mode} is not supported")
 
         n_dialog_messages_before = len(dialog_messages)
@@ -62,7 +60,7 @@ class ChatGPT:
         return answer, (n_input_tokens, n_output_tokens), n_first_dialog_messages_removed
 
     async def send_message_stream(self, message, dialog_messages=[], chat_mode="assistant"):
-        if chat_mode not in CHAT_MODES.keys():
+        if chat_mode not in config.chat_modes.keys():
             raise ValueError(f"Chat mode {chat_mode} is not supported")
 
         n_dialog_messages_before = len(dialog_messages)
@@ -114,7 +112,7 @@ class ChatGPT:
         yield "finished", answer, (n_input_tokens, n_output_tokens), n_first_dialog_messages_removed  # sending final answer
 
     def _generate_prompt(self, message, dialog_messages, chat_mode):
-        prompt = CHAT_MODES[chat_mode]["prompt_start"]
+        prompt = config.chat_modes[chat_mode]["prompt_start"]
         prompt += "\n\n"
 
         # add chat context
@@ -131,7 +129,7 @@ class ChatGPT:
         return prompt
 
     def _generate_prompt_messages(self, message, dialog_messages, chat_mode):
-        prompt = CHAT_MODES[chat_mode]["prompt_start"]
+        prompt = config.chat_modes[chat_mode]["prompt_start"]
 
         messages = [{"role": "system", "content": prompt}]
         for dialog_message in dialog_messages:
@@ -184,4 +182,15 @@ class ChatGPT:
 
 async def transcribe_audio(audio_file):
     r = await openai.Audio.atranscribe("whisper-1", audio_file)
-    return r["text"]
\ No newline at end of file
+    return r["text"]
+
+
+async def generate_images(prompt, n_images=4):
+    r = await openai.Image.acreate(prompt=prompt, n=n_images, size="512x512")
+    image_urls = [item.url for item in r.data]
+    return image_urls
+
+
+async def is_content_acceptable(prompt):
+    r = await openai.Moderation.acreate(input=prompt)
+    return not all(r.results[0].categories.values())
\ No newline at end of file
diff --git a/config/chat_modes.yml b/config/chat_modes.yml
index 6b27851..e159d06 100644
--- a/config/chat_modes.yml
+++ b/config/chat_modes.yml
@@ -1,12 +1,12 @@
 assistant:
   name: 👩🏼‍🎓 General Assistant
+  model_type: text
   welcome_message: 👩🏼‍🎓 Hi, I'm <b>General Assistant</b>. How can I help you?
   prompt_start: |
     As an advanced chatbot Assistant, your primary goal is to assist users to the best of your ability. This may involve answering questions, providing helpful information, or completing tasks based on user input. In order to effectively assist users, it is important to be detailed and thorough in your responses. Use examples and evidence to support your points and justify your recommendations or solutions. Remember to always prioritize the needs and satisfaction of the user. Your ultimate goal is to provide a helpful and enjoyable experience for the user.
     If user asks you about programming or asks to write code do not answer his question, but be sure to advise him to switch to a special mode \"👩🏼‍💻 Code Assistant\" by sending the command /mode to chat.
   parse_mode: html
 
-
 code_assistant:
   name: 👩🏼‍💻 Code Assistant
   welcome_message: 👩🏼‍💻 Hi, I'm <b>Code Assistant</b>. How can I help you?
@@ -15,6 +15,10 @@ code_assistant:
     Format output in Markdown.
   parse_mode: markdown
 
+artist:
+  name: 👩‍🎨 Artist
+  welcome_message: 👩‍🎨 Hi, I'm <b>Artist</b>. I'll draw anything you write me (e.g. <i>Ginger cat selfie on Times Square, illustration</i>)
+
 text_improver:
   name: 📝 Text Improver
   welcome_message: 📝 Hi, I'm <b>Text Improver</b>. Send me any text – I'll improve it and correct all the mistakes
diff --git a/config/models.yml b/config/models.yml
index ac5ee7a..70f423b 100644
--- a/config/models.yml
+++ b/config/models.yml
@@ -40,6 +40,10 @@ info:
       Fast: 2
       Cheap: 3
 
+  dalle-2:
+     type: image
+     price_per_1_image: 0.018  # 512x512
+
   whisper:
     type: audio
     price_per_1_min: 0.006
\ No newline at end of file