Merge pull request #5 from karfly/mongodb

Move to MongoDB
2026-06-13 03:54:57 +03:00 · 2023-01-19 23:56:58 +03:00
parent e2869135a8 fdb91a8da6
commit 260a3a2afa
13 changed files with 270 additions and 107 deletions
@@ -129,5 +129,7 @@ dmypy.json
 .pyre/

 # Custom
-config.yml
-*.pkl
+config/config.yml
+config/config.env
+
+docker-compose.dev.yml
@@ -25,15 +25,16 @@ This repo is ChatGPT re-created with GPT-3.5 LLM as Telegram Bot. **And it works

 2. Get your Telegram bot token from [@BotFather](https://t.me/BotFather)

-3. Edit `config.example.yml` to add your tokens and raname it to `config.yml`:
+3. Edit `config/config.example.yml` to set your tokens and run 2 commands below (*if you're advanced user, you can also edit* `config/config.example.env`):
 ```bash
-mv config.example.yml config.yml
+mv config/config.example.yml config/config.yml
+mv config/config.example.env config/config.env
 ```

 🔥 And now **run**:

 ```bash
-docker compose up --build
+docker compose --env-file config/config.env up --build
 ```

 ## References
@@ -3,7 +3,7 @@ import logging
 import traceback
 import html
 import json
-import time
+from datetime import datetime

 from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
 from telegram.ext import (
@@ -11,18 +11,18 @@ from telegram.ext import (
    CallbackContext,
    CommandHandler,
    MessageHandler,
-    PicklePersistence,
    CallbackQueryHandler,
    filters
 )
 from telegram.constants import ParseMode, ChatAction

-import chatgpt
-import utils
 import config
+import database
+import chatgpt


 # setup
+db = database.Database()
 logger = logging.getLogger(__name__)

 HELP_MESSAGE = """Commands:
@@ -35,9 +35,21 @@ HELP_MESSAGE = """Commands:


 async def start_handle(update: Update, context: CallbackContext):
-    utils.init_user(update, context)
-    context.user_data["last_interation_timestamp"] = time.time()
+    user = update.message.from_user
+    user_id = user.id

+    if not db.check_if_user_exists(user.id):
+        db.add_new_user(
+            user_id,
+            update.message.chat_id,
+            username=user.username,
+            first_name=user.first_name,
+            last_name= user.last_name
+        )
+    
+    db.set_user_attribute(user_id, "last_interaction", datetime.now())
+    db.start_new_dialog(user_id)
+    
    reply_text = "Hi! I'm <b>ChatGPT</b> bot implemented with GPT-3.5 OpenAI API 🤖\n\n"
    reply_text += HELP_MESSAGE

@@ -47,77 +59,93 @@ async def start_handle(update: Update, context: CallbackContext):


 async def help_handle(update: Update, context: CallbackContext):
-    utils.init_user(update, context)
-    context.user_data["last_interation_timestamp"] = time.time()
+    user_id = update.message.from_user.id
+    db.set_user_attribute(user_id, "last_interaction", datetime.now())
    await update.message.reply_text(HELP_MESSAGE, parse_mode=ParseMode.HTML)


 async def retry_handle(update: Update, context: CallbackContext):
-    context.user_data["last_interation_timestamp"] = time.time()
+    user_id = update.message.from_user.id
+    db.set_user_attribute(user_id, "last_interaction", datetime.now())

-    if len(context.user_data["chat_context"]) == 0:
+    dialog_messages = db.get_dialog_messages(user_id, dialog_id=None)
+    if len(dialog_messages) == 0:
        await update.message.reply_text("No message to retry 🤷‍♂️")
        return

-    last_chat_context_item = context.user_data["chat_context"].pop()
-    await message_handle(update, context, message=last_chat_context_item["user"], use_new_dialog_timeout=False)
+    last_dialog_message = dialog_messages.pop()
+    db.set_dialog_messages(user_id, dialog_messages, dialog_id=None)  # last message was removed from the context
+
+    await message_handle(update, context, message=last_dialog_message["user"], use_new_dialog_timeout=False)


 async def message_handle(update: Update, context: CallbackContext, message=None, use_new_dialog_timeout=True):
-    utils.init_user(update, context)
+    user_id = update.message.from_user.id

    # new dialog timeout
    if use_new_dialog_timeout:
-        if time.time() - context.user_data["last_interation_timestamp"] > config.new_dialog_timeout:
-            context.user_data["chat_context"] = []
+        if (datetime.now() - db.get_user_attribute(user_id, "last_interaction")).seconds > config.new_dialog_timeout:
+            db.start_new_dialog(user_id)
            await update.message.reply_text("Starting new dialog due to timeout ✅")
-    context.user_data["last_interation_timestamp"] = time.time()
+    db.set_user_attribute(user_id, "last_interaction", datetime.now())

    # send typing action
    await update.message.chat.send_action(action="typing")

    try:
        message = message or update.message.text
-        answer, prompt, chat_context, n_used_tokens, n_first_chat_context_messages_removed = chatgpt.ChatGPT().send_message(
+
+        answer, prompt, n_used_tokens, n_first_dialog_messages_removed = chatgpt.ChatGPT().send_message(
            message,
-            chat_context=context.user_data["chat_context"],
-            chat_mode=context.user_data["chat_mode"]
+            dialog_messages=db.get_dialog_messages(user_id, dialog_id=None),
+            chat_mode=db.get_user_attribute(user_id, "current_chat_mode"),
        )
+
+        # update user data
+        new_dialog_message = {"user": message, "bot": answer, "date": datetime.now()}
+        db.set_dialog_messages(
+            user_id,
+            db.get_dialog_messages(user_id, dialog_id=None) + [new_dialog_message],
+            dialog_id=None
+        )
+
+        db.set_user_attribute(user_id, "n_used_tokens", n_used_tokens + db.get_user_attribute(user_id, "n_used_tokens"))
+
    except Exception as e:
        error_text = f"Something went wrong during completion. Reason: {e}"
        logger.error(error_text)
        await update.message.reply_text(error_text)
        return

-    # update user data
-    context.user_data["chat_context"] = chat_context
-    context.user_data["total_n_used_tokens"] += n_used_tokens
-
    # send message if some messages were removed from the context
-    if n_first_chat_context_messages_removed > 0:
-        if n_first_chat_context_messages_removed == 1:
+    if n_first_dialog_messages_removed > 0:
+        if n_first_dialog_messages_removed == 1:
            text = "✍️ <i>Note:</i> Your current dialog is too long, so your <b>first message</b> was removed from the context.\n Send /new command to start new dialog"
        else:
-            text = f"✍️ <i>Note:</i> Your current dialog is too long, so <b>{n_first_chat_context_messages_removed} first messages</b> were removed from the context.\n Send /new command to start new dialog"
+            text = f"✍️ <i>Note:</i> Your current dialog is too long, so <b>{n_first_dialog_messages_removed} first messages</b> were removed from the context.\n Send /new command to start new dialog"
        await update.message.reply_text(text, parse_mode=ParseMode.HTML)

-    await update.message.reply_text(answer, parse_mode=ParseMode.HTML)
+    try:
+        await update.message.reply_text(answer, parse_mode=ParseMode.HTML)
+    except telegram.error.BadRequest:
+        # answer has invalid characters, so we send it without parse_mode
+        await update.message.reply_text(answer)


 async def new_dialog_handle(update: Update, context: CallbackContext):
-    utils.init_user(update, context)
-    context.user_data["last_interation_timestamp"] = time.time()
+    user_id = update.message.from_user.id
+    db.set_user_attribute(user_id, "last_interaction", datetime.now())

-    context.user_data["chat_context"] = []
+    db.start_new_dialog(user_id)
    await update.message.reply_text("Starting new dialog ✅")

-    chat_mode = context.user_data["chat_mode"]
+    chat_mode = db.get_user_attribute(user_id, "current_chat_mode")
    await update.message.reply_text(f"{chatgpt.CHAT_MODES[chat_mode]['welcome_message']}", parse_mode=ParseMode.HTML)


 async def show_chat_modes_handle(update: Update, context: CallbackContext):
-    utils.init_user(update, context)
-    context.user_data["last_interation_timestamp"] = time.time()
+    user_id = update.message.from_user.id
+    db.set_user_attribute(user_id, "last_interaction", datetime.now())

    keyboard = []
    for chat_mode, chat_mode_dict in chatgpt.CHAT_MODES.items():
@@ -128,13 +156,15 @@ async def show_chat_modes_handle(update: Update, context: CallbackContext):


 async def set_chat_mode_handle(update: Update, context: CallbackContext):
+    user_id = update.callback_query.from_user.id
+
    query = update.callback_query
    await query.answer()

    chat_mode = query.data.split("|")[1]

-    context.user_data["chat_mode"] = chat_mode
-    context.user_data["chat_context"] = []
+    db.set_user_attribute(user_id, "current_chat_mode", chat_mode)
+    db.start_new_dialog(user_id)

    await query.edit_message_text(
        f"<b>{chatgpt.CHAT_MODES[chat_mode]['name']}</b> chat mode is set",
@@ -145,14 +175,14 @@ async def set_chat_mode_handle(update: Update, context: CallbackContext):


 async def show_balance_handle(update: Update, context: CallbackContext):
-    utils.init_user(update, context)
-    context.user_data["last_interation_timestamp"] = time.time()
+    user_id = update.message.from_user.id
+    db.set_user_attribute(user_id, "last_interaction", datetime.now())

-    total_n_used_tokens = context.user_data['total_n_used_tokens']
-    total_spent_dollars = total_n_used_tokens * (0.01 / 1000)
+    n_used_tokens = db.get_user_attribute(user_id, "n_used_tokens")
+    n_spent_dollars = n_used_tokens * (0.01 / 1000)

-    text = f"You spent <b>{total_spent_dollars:.03f}$</b>\n"
-    text += f"You used <b>{total_n_used_tokens}</b> tokens <i>(price: 0.01$ per 1000 tokens)</i>\n"
+    text = f"You spent <b>{n_spent_dollars:.03f}$</b>\n"
+    text += f"You used <b>{n_used_tokens}</b> tokens <i>(price: 0.01$ per 1000 tokens)</i>\n"

    await update.message.reply_text(text, parse_mode=ParseMode.HTML)

@@ -175,12 +205,9 @@ async def error_handler(update: Update, context: CallbackContext) -> None:


 def run_bot() -> None:
-    persistence = PicklePersistence(filepath=config.persistence_path)
-
    application = (
        ApplicationBuilder()
        .token(config.telegram_token)
-        .persistence(persistence)
        .build()
    )

@@ -8,7 +8,7 @@ CHAT_MODES = {
    "assistant": {
        "name": "👩🏼‍🎓 Assistant",
        "welcome_message": "👩🏼‍🎓 Hi, I'm <b>ChatGPT assistant</b>. How can I help you?",
-        "prompt_start": "As an advanced chatbot named ChatGPT, your primary goal is to assist users to the best of your ability. This may involve answering questions, providing helpful information, or completing tasks based on user input. In order to effectively assist users, it is important to be detailed and thorough in your responses. Use examples and evidence to support your points and justify your recommendations or solutions. Remember to always prioritize the needs and satisfaction of the user. Your ultimate goal is to provide a helpful and enjoyable experience for the user. If you have code in your reply, write it inside <code>, </code> tags."
+        "prompt_start": "As an advanced chatbot named ChatGPT, your primary goal is to assist users to the best of your ability. This may involve answering questions, providing helpful information, or completing tasks based on user input. In order to effectively assist users, it is important to be detailed and thorough in your responses. Use examples and evidence to support your points and justify your recommendations or solutions. Remember to always prioritize the needs and satisfaction of the user. Your ultimate goal is to provide a helpful and enjoyable experience for the user."
    },

    "code_assistant": {
@@ -29,14 +29,14 @@ class ChatGPT:
    def __init__(self):
        pass
    
-    def send_message(self, message, chat_context=[], chat_mode="assistant"):
+    def send_message(self, message, dialog_messages=[], chat_mode="assistant"):
        if chat_mode not in CHAT_MODES.keys():
            raise ValueError(f"Chat mode {chat_mode} is not supported")

-        chat_context_len_before = len(chat_context)
+        n_dialog_messages_before = len(dialog_messages)
        answer = None
        while answer is None:
-            prompt = self._generate_prompt(message, chat_context, chat_mode)
+            prompt = self._generate_prompt(message, dialog_messages, chat_mode)
            try:
                r = openai.Completion.create(
                    engine="text-davinci-003",
@@ -48,37 +48,38 @@ class ChatGPT:
                    presence_penalty=0,
                )
                answer = r.choices[0].text
-                answer = answer.strip()
+                answer = self._postprocess_answer(answer)

                n_used_tokens = r.usage.total_tokens

            except openai.error.InvalidRequestError as e:  # too many tokens
-                if len(chat_context) == 0:
-                    raise ValueError("chat_context is reduced to zero, but still has too many tokens to make completion") from e
+                if len(dialog_messages) == 0:
+                    raise ValueError("Dialog messages is reduced to zero, but still has too many tokens to make completion") from e

-                # forget first message in chat_context
-                chat_context = chat_context[1:]
+                # forget first message in dialog_messages
+                dialog_messages = dialog_messages[1:]

-        n_first_chat_context_messages_removed = chat_context_len_before - len(chat_context)
+        n_first_dialog_messages_removed = n_dialog_messages_before - len(dialog_messages)

-        # update chat_context
-        chat_context.append({"user": message, "chatgpt": answer})
+        return answer, prompt, n_used_tokens, n_first_dialog_messages_removed

-        return answer, prompt, chat_context, n_used_tokens, n_first_chat_context_messages_removed
-
-    def _generate_prompt(self, message, chat_context, chat_mode):
+    def _generate_prompt(self, message, dialog_messages, chat_mode):
        prompt = CHAT_MODES[chat_mode]["prompt_start"]
        prompt += "\n\n"

        # add chat context
-        if len(chat_context) > 0:
+        if len(dialog_messages) > 0:
            prompt += "Chat:\n"
-            for chat_context_item in chat_context:
-                prompt += f"User: {chat_context_item['user']}\n"
-                prompt += f"ChatGPT: {chat_context_item['chatgpt']}\n"
+            for dialog_message in dialog_messages:
+                prompt += f"User: {dialog_message['user']}\n"
+                prompt += f"ChatGPT: {dialog_message['bot']}\n"

        # current message
        prompt += f"User: {message}\n"
        prompt += "ChatGPT: "

        return prompt
+
+    def _postprocess_answer(self, answer):
+        answer = answer.strip()
+        return answer
@@ -0,0 +1,19 @@
+import yaml
+import dotenv
+from pathlib import Path
+
+config_dir = Path(__file__).parent.parent.resolve() / "config"
+
+# load yaml config
+with open(config_dir / "config.yml", 'r') as f:
+    config_yaml = yaml.safe_load(f)
+
+# load .env config
+config_env = dotenv.dotenv_values(config_dir / "config.env")
+
+# config parameters
+telegram_token = config_yaml["telegram_token"]
+openai_api_key = config_yaml["openai_api_key"]
+allowed_telegram_usernames = config_yaml["allowed_telegram_usernames"]
+new_dialog_timeout = config_yaml["new_dialog_timeout"]
+mongodb_uri = f"mongodb://mongo:{config_env['MONGODB_PORT']}"
@@ -0,0 +1,111 @@
+from typing import Optional, Any
+
+import pymongo
+import uuid
+from datetime import datetime
+
+import config
+
+
+class Database:
+    def __init__(self):
+        self.client = pymongo.MongoClient(config.mongodb_uri)
+        self.db = self.client["chatgpt_telegram_bot"]
+
+        self.user_collection = self.db["user"]
+        self.dialog_collection = self.db["dialog"]
+
+    def check_if_user_exists(self, user_id: int, raise_exception: bool = False):
+        if self.user_collection.count_documents({"_id": user_id}) > 0:
+            return True
+        else:
+            if raise_exception:
+                raise ValueError(f"User {user_id} does not exist")
+            else:
+                return False
+        
+    def add_new_user(
+        self,
+        user_id: int,
+        chat_id: int,
+        username: str = "",
+        first_name: str = "",
+        last_name: str = "",
+    ):
+        user_dict = {
+            "_id": user_id,
+            "chat_id": chat_id,
+
+            "username": username,
+            "first_name": first_name,
+            "last_name": last_name,
+
+            "last_interaction": datetime.now(),
+            "first_seen": datetime.now(),
+            
+            "current_dialog_id": None,
+            "current_chat_mode": "assistant",
+
+            "n_used_tokens": 0
+        }
+
+        if not self.check_if_user_exists(user_id):
+            self.user_collection.insert_one(user_dict)
+            
+        # TODO: maybe start a new dialog here?
+
+    def start_new_dialog(self, user_id: int):
+        self.check_if_user_exists(user_id, raise_exception=True)
+
+        dialog_id = str(uuid.uuid4())
+        dialog_dict = {
+            "_id": dialog_id,
+            "user_id": user_id,
+            "chat_mode": self.get_user_attribute(user_id, "current_chat_mode"),
+            "start_time": datetime.now(),
+            "messages": []
+        }
+
+        # add new dialog
+        self.dialog_collection.insert_one(dialog_dict)
+
+        # update user's current dialog
+        self.user_collection.update_one(
+            {"_id": user_id},
+            {"$set": {"current_dialog_id": dialog_id}}
+        )
+
+        return dialog_id
+
+    def get_user_attribute(self, user_id: int, key: str):
+        self.check_if_user_exists(user_id, raise_exception=True)
+        user_dict = self.user_collection.find_one({"_id": user_id})
+
+        if key not in user_dict:
+            raise ValueError(f"User {user_id} does not have a value for {key}")
+
+        return user_dict[key]
+
+    def set_user_attribute(self, user_id: int, key: str, value: Any):
+        self.check_if_user_exists(user_id, raise_exception=True)
+        self.user_collection.update_one({"_id": user_id}, {"$set": {key: value}})
+
+    def get_dialog_messages(self, user_id: int, dialog_id: Optional[str] = None):
+        self.check_if_user_exists(user_id, raise_exception=True)
+
+        if dialog_id is None:
+            dialog_id = self.get_user_attribute(user_id, "current_dialog_id")
+
+        dialog_dict = self.dialog_collection.find_one({"_id": dialog_id, "user_id": user_id})               
+        return dialog_dict["messages"]
+
+    def set_dialog_messages(self, user_id: int, dialog_messages: list, dialog_id: Optional[str] = None):
+        self.check_if_user_exists(user_id, raise_exception=True)
+
+        if dialog_id is None:
+            dialog_id = self.get_user_attribute(user_id, "current_dialog_id")
+        
+        self.dialog_collection.update_one(
+            {"_id": dialog_id, "user_id": user_id},
+            {"$set": {"messages": dialog_messages}}
+        )
@@ -1,11 +0,0 @@
-import yaml
-
-
-with open("config.yml", 'r') as f:
-    config = yaml.safe_load(f)
-
-telegram_token = config["telegram_token"]
-openai_api_key = config["openai_api_key"]
-allowed_telegram_usernames = config["allowed_telegram_usernames"]
-persistence_path = config["persistence_path"]
-new_dialog_timeout = config["new_dialog_timeout"]
@@ -0,0 +1,6 @@
+MONGODB_PATH=./mongodb  # local path where to store MongoDB
+MONGODB_PORT=27017  # MongoDB port
+
+MONGO_EXPRESS_PORT=8081  # Mongo Express port
+MONGO_EXPRESS_USERNAME=username  # Mongo Express username
+MONGO_EXPRESS_PASSWORD=password  # Mongo Express password
@@ -1,5 +1,4 @@
 telegram_token: ""
 openai_api_key: ""
 allowed_telegram_usernames: []  # if empty, the bot is available to anyone
-persistence_path: "./persistence.pkl"  # path where to store user data
 new_dialog_timeout: 600  # new dialog starts after timeout (in seconds)
@@ -1,10 +1,38 @@
 version: "3"

 services:
-  llm_telegram_bot:
+  mongo:
+    container_name: mongo
+    image: mongo:latest
+    restart: always
+    ports:
+      - ${MONGODB_PORT:-27017}:${MONGODB_PORT:-27017}
+    volumes:
+      - ${MONGODB_PATH:-./mongodb}:/data/db
+    # TODO: add auth
+
+  chatgpt_telegram_bot:
    container_name: chatgpt_telegram_bot
-    command: python3 bot.py
+    command: python3 bot/bot.py
    restart: always
    build:
      context: "."
-      dockerfile: Dockerfile    
+      dockerfile: Dockerfile
+    depends_on:
+      - mongo
+
+  mongo_express:
+    container_name: mongo-express
+    image: mongo-express:latest
+    restart: always
+    ports:
+      - ${MONGO_EXPRESS_PORT:-8081}:${MONGO_EXPRESS_PORT:-8081}
+    environment:
+      - ME_CONFIG_MONGODB_SERVER=mongo
+      - ME_CONFIG_MONGODB_PORT=${MONGODB_PORT:-27017}
+      - ME_CONFIG_MONGODB_ENABLE_ADMIN=false
+      - ME_CONFIG_MONGODB_AUTH_DATABASE=chatgpt_telegram_bot
+      - ME_CONFIG_BASICAUTH_USERNAME=${MONGO_EXPRESS_USERNAME:-username}
+      - ME_CONFIG_BASICAUTH_PASSWORD=${MONGO_EXPRESS_PASSWORD:-password}
+    depends_on:
+      - mongo
@@ -1,3 +1,5 @@
 python-telegram-bot==20.0a0
-openai
-PyYAML
+openai>=0.26.1
+PyYAML==6.0
+pymongo==4.3.3
+python-dotenv==0.21.0
@@ -1,22 +0,0 @@
-import time
-
-from telegram.ext import CallbackContext
-from telegram import Update
-
-from chatgpt import ChatGPT
-import config
-
-
-def init_user(update: Update, context: CallbackContext):
-    # init chatgpt
-    if "chat_context" not in context.user_data:
-        context.user_data["chat_context"] = []
-
-    if "chat_mode" not in context.user_data:
-        context.user_data["chat_mode"] = "assistant"
-
-    if "total_n_used_tokens" not in context.user_data:
-        context.user_data["total_n_used_tokens"] = 0
-
-    if "last_interation_timestamp" not in context.user_data:
-        context.user_data["last_interation_timestamp"] = time.time()