DALLE 2

2026-06-13 03:54:57 +03:00 · 2023-04-21 12:37:06 +03:00
parent 3c8edcb5e0
commit e421be1634
6 changed files with 88 additions and 15 deletions
@@ -8,6 +8,7 @@ import tempfile
 import pydub
 from pathlib import Path
 from datetime import datetime
+import openai

 import telegram
 from telegram import (
@@ -91,6 +92,10 @@ async def register_user_if_not_exists(update: Update, context: CallbackContext,
    if db.get_user_attribute(user.id, "n_transcribed_seconds") is None:
        db.set_user_attribute(user.id, "n_transcribed_seconds", 0.0)

+    # image generation
+    if db.get_user_attribute(user.id, "n_generated_images") is None:
+        db.set_user_attribute(user.id, "n_generated_images", 0)
+

 async def start_handle(update: Update, context: CallbackContext):
    await register_user_if_not_exists(update, context, update.message.from_user)
@@ -142,14 +147,18 @@ async def message_handle(update: Update, context: CallbackContext, message=None,
    if await is_previous_message_not_answered_yet(update, context): return

    user_id = update.message.from_user.id
-    async def message_handle_fn():
-        chat_mode = db.get_user_attribute(user_id, "current_chat_mode")
+    chat_mode = db.get_user_attribute(user_id, "current_chat_mode")

+    if chat_mode == "artist":
+        await generate_image_handle(update, context, message=message)
+        return
+
+    async def message_handle_fn():
        # new dialog timeout
        if use_new_dialog_timeout:
            if (datetime.now() - db.get_user_attribute(user_id, "last_interaction")).seconds > config.new_dialog_timeout and len(db.get_dialog_messages(user_id)) > 0:
                db.start_new_dialog(user_id)
-                await update.message.reply_text(f"Starting new dialog due to timeout (<b>{openai_utils.CHAT_MODES[chat_mode]['name']}</b> mode) ✅", parse_mode=ParseMode.HTML)
+                await update.message.reply_text(f"Starting new dialog due to timeout (<b>{config.chat_modes[chat_mode]['name']}</b> mode) ✅", parse_mode=ParseMode.HTML)
        db.set_user_attribute(user_id, "last_interaction", datetime.now())

        # in case of CancelledError
@@ -164,12 +173,15 @@ async def message_handle(update: Update, context: CallbackContext, message=None,
            await update.message.chat.send_action(action="typing")

            _message = message or update.message.text
+            if _message is None or len(_message) == 0:
+                 await update.message.reply_text("🥲 You sent <b>empty message</b>. Please, try again!", parse_mode=ParseMode.HTML)
+                 return

            dialog_messages = db.get_dialog_messages(user_id, dialog_id=None)
            parse_mode = {
                "html": ParseMode.HTML,
                "markdown": ParseMode.MARKDOWN
-            }[openai_utils.CHAT_MODES[chat_mode]["parse_mode"]]
+            }[config.chat_modes[chat_mode]["parse_mode"]]

            chatgpt_instance = openai_utils.ChatGPT(model=current_model)
            if config.enable_message_streaming:
@@ -289,6 +301,9 @@ async def voice_message_handle(update: Update, context: CallbackContext):
        with open(voice_mp3_path, "rb") as f:
            transcribed_text = await openai_utils.transcribe_audio(f)

+            if transcribed_text is None:
+                 transcribed_text = ""
+
    text = f"🎤: <i>{transcribed_text}</i>"
    await update.message.reply_text(text, parse_mode=ParseMode.HTML)

@@ -298,6 +313,35 @@ async def voice_message_handle(update: Update, context: CallbackContext):
    await message_handle(update, context, message=transcribed_text)


+async def generate_image_handle(update: Update, context: CallbackContext, message=None):
+    await register_user_if_not_exists(update, context, update.message.from_user)
+    if await is_previous_message_not_answered_yet(update, context): return
+
+    user_id = update.message.from_user.id
+    db.set_user_attribute(user_id, "last_interaction", datetime.now())
+
+    await update.message.chat.send_action(action="upload_photo")
+
+    message = message or update.message.text
+
+    try:
+        image_urls = await openai_utils.generate_images(message, n_images=config.return_n_generated_images)
+    except openai.error.InvalidRequestError as e:
+        if str(e).startswith("Your request was rejected as a result of our safety system"):
+            text = "🥲 Your request <b>doesn't comply</b> with OpenAI's usage policies.\nWhat did you write there, huh?"
+            await update.message.reply_text(text, parse_mode=ParseMode.HTML)
+            return
+        else:
+            raise
+
+    # token usage
+    db.set_user_attribute(user_id, "n_generated_images", config.return_n_generated_images + db.get_user_attribute(user_id, "n_generated_images"))
+
+    for i, image_url in enumerate(image_urls):
+        await update.message.chat.send_action(action="upload_photo")
+        await update.message.reply_photo(image_url, parse_mode=ParseMode.HTML)
+
+
 async def new_dialog_handle(update: Update, context: CallbackContext):
    await register_user_if_not_exists(update, context, update.message.from_user)
    if await is_previous_message_not_answered_yet(update, context): return
@@ -309,7 +353,7 @@ async def new_dialog_handle(update: Update, context: CallbackContext):
    await update.message.reply_text("Starting new dialog ✅")

    chat_mode = db.get_user_attribute(user_id, "current_chat_mode")
-    await update.message.reply_text(f"{openai_utils.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML)
+    await update.message.reply_text(f"{config.chat_modes[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML)


 async def cancel_handle(update: Update, context: CallbackContext):
@@ -333,7 +377,7 @@ async def show_chat_modes_handle(update: Update, context: CallbackContext):
    db.set_user_attribute(user_id, "last_interaction", datetime.now())

    keyboard = []
-    for chat_mode, chat_mode_dict in openai_utils.CHAT_MODES.items():
+    for chat_mode, chat_mode_dict in config.chat_modes.items():
        keyboard.append([InlineKeyboardButton(chat_mode_dict["name"], callback_data=f"set_chat_mode|{chat_mode}")])
    reply_markup = InlineKeyboardMarkup(keyboard)

@@ -352,7 +396,7 @@ async def set_chat_mode_handle(update: Update, context: CallbackContext):
    db.set_user_attribute(user_id, "current_chat_mode", chat_mode)
    db.start_new_dialog(user_id)

-    await query.edit_message_text(f"{openai_utils.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML)
+    await query.edit_message_text(f"{config.chat_modes[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML)


 def get_settings_menu(user_id: int):
@@ -422,6 +466,7 @@ async def show_balance_handle(update: Update, context: CallbackContext):
    total_n_used_tokens = 0

    n_used_tokens_dict = db.get_user_attribute(user_id, "n_used_tokens")
+    n_generated_images = db.get_user_attribute(user_id, "n_generated_images")
    n_transcribed_seconds = db.get_user_attribute(user_id, "n_transcribed_seconds")

    details_text = "🏷️ Details:\n"
@@ -435,12 +480,21 @@ async def show_balance_handle(update: Update, context: CallbackContext):

        details_text += f"- {model_key}: <b>{n_input_spent_dollars + n_output_spent_dollars:.03f}$</b> / <b>{n_input_tokens + n_output_tokens} tokens</b>\n"

+    # image generation
+    image_generation_n_spent_dollars = config.models["info"]["dalle-2"]["price_per_1_image"] * n_generated_images
+    if n_generated_images != 0:
+        details_text += f"- DALL·E 2 (image generation): <b>{image_generation_n_spent_dollars:.03f}$</b> / <b>{n_generated_images} generated images</b>\n"
+
+    total_n_spent_dollars += image_generation_n_spent_dollars
+
+    # voice recognition
    voice_recognition_n_spent_dollars = config.models["info"]["whisper"]["price_per_1_min"] * (n_transcribed_seconds / 60)
    if n_transcribed_seconds != 0:
        details_text += f"- Whisper (voice recognition): <b>{voice_recognition_n_spent_dollars:.03f}$</b> / <b>{n_transcribed_seconds:.01f} seconds</b>\n"

    total_n_spent_dollars += voice_recognition_n_spent_dollars

+
    text = f"You spent <b>{total_n_spent_dollars:.03f}$</b>\n"
    text += f"You used <b>{total_n_used_tokens}</b> tokens\n\n"
    text += details_text
@@ -18,6 +18,7 @@ use_chatgpt_api = config_yaml.get("use_chatgpt_api", True)
 allowed_telegram_usernames = config_yaml["allowed_telegram_usernames"]
 new_dialog_timeout = config_yaml["new_dialog_timeout"]
 enable_message_streaming = config_yaml.get("enable_message_streaming", True)
+return_n_generated_images = config_yaml.get("return_n_generated_images", 1)
 mongodb_uri = f"mongodb://mongo:{config_env['MONGODB_PORT']}"

 # chat_modes
@@ -49,6 +49,7 @@ class Database:

            "n_used_tokens": {},

+            "n_generated_images": 0,
            "n_transcribed_seconds": 0.0  # voice message transcription
        }

@@ -5,8 +5,6 @@ import openai
 openai.api_key = config.openai_api_key


-CHAT_MODES = config.chat_modes
-
 OPENAI_COMPLETION_OPTIONS = {
    "temperature": 0.7,
    "max_tokens": 1000,
@@ -22,7 +20,7 @@ class ChatGPT:
        self.model = model

    async def send_message(self, message, dialog_messages=[], chat_mode="assistant"):
-        if chat_mode not in CHAT_MODES.keys():
+        if chat_mode not in config.chat_modes.keys():
            raise ValueError(f"Chat mode {chat_mode} is not supported")

        n_dialog_messages_before = len(dialog_messages)
@@ -62,7 +60,7 @@ class ChatGPT:
        return answer, (n_input_tokens, n_output_tokens), n_first_dialog_messages_removed

    async def send_message_stream(self, message, dialog_messages=[], chat_mode="assistant"):
-        if chat_mode not in CHAT_MODES.keys():
+        if chat_mode not in config.chat_modes.keys():
            raise ValueError(f"Chat mode {chat_mode} is not supported")

        n_dialog_messages_before = len(dialog_messages)
@@ -114,7 +112,7 @@ class ChatGPT:
        yield "finished", answer, (n_input_tokens, n_output_tokens), n_first_dialog_messages_removed  # sending final answer

    def _generate_prompt(self, message, dialog_messages, chat_mode):
-        prompt = CHAT_MODES[chat_mode]["prompt_start"]
+        prompt = config.chat_modes[chat_mode]["prompt_start"]
        prompt += "\n\n"

        # add chat context
@@ -131,7 +129,7 @@ class ChatGPT:
        return prompt

    def _generate_prompt_messages(self, message, dialog_messages, chat_mode):
-        prompt = CHAT_MODES[chat_mode]["prompt_start"]
+        prompt = config.chat_modes[chat_mode]["prompt_start"]

        messages = [{"role": "system", "content": prompt}]
        for dialog_message in dialog_messages:
@@ -184,4 +182,15 @@ class ChatGPT:

 async def transcribe_audio(audio_file):
    r = await openai.Audio.atranscribe("whisper-1", audio_file)
-    return r["text"]
+    return r["text"]
+
+
+async def generate_images(prompt, n_images=4):
+    r = await openai.Image.acreate(prompt=prompt, n=n_images, size="512x512")
+    image_urls = [item.url for item in r.data]
+    return image_urls
+
+
+async def is_content_acceptable(prompt):
+    r = await openai.Moderation.acreate(input=prompt)
+    return not all(r.results[0].categories.values())
@@ -1,12 +1,12 @@
 assistant:
  name: 👩🏼‍🎓 General Assistant
+  model_type: text
  welcome_message: 👩🏼‍🎓 Hi, I'm <b>General Assistant</b>. How can I help you?
  prompt_start: |
    As an advanced chatbot Assistant, your primary goal is to assist users to the best of your ability. This may involve answering questions, providing helpful information, or completing tasks based on user input. In order to effectively assist users, it is important to be detailed and thorough in your responses. Use examples and evidence to support your points and justify your recommendations or solutions. Remember to always prioritize the needs and satisfaction of the user. Your ultimate goal is to provide a helpful and enjoyable experience for the user.
    If user asks you about programming or asks to write code do not answer his question, but be sure to advise him to switch to a special mode \"👩🏼‍💻 Code Assistant\" by sending the command /mode to chat.
  parse_mode: html

-
 code_assistant:
  name: 👩🏼‍💻 Code Assistant
  welcome_message: 👩🏼‍💻 Hi, I'm <b>Code Assistant</b>. How can I help you?
@@ -15,6 +15,10 @@ code_assistant:
    Format output in Markdown.
  parse_mode: markdown

+artist:
+  name: 👩‍🎨 Artist
+  welcome_message: 👩‍🎨 Hi, I'm <b>Artist</b>. I'll draw anything you write me (e.g. <i>Ginger cat selfie on Times Square, illustration</i>)
+
 text_improver:
  name: 📝 Text Improver
  welcome_message: 📝 Hi, I'm <b>Text Improver</b>. Send me any text – I'll improve it and correct all the mistakes
@@ -40,6 +40,10 @@ info:
      Fast: 2
      Cheap: 3

+  dalle-2:
+     type: image
+     price_per_1_image: 0.018  # 512x512
+
  whisper:
    type: audio
    price_per_1_min: 0.006