Store Voice temp files in memory and don't convert them

2026-06-13 03:54:57 +03:00 · 2023-11-02 13:52:35 +00:00
parent bab5938241
commit ce0c34825f
3 changed files with 12 additions and 26 deletions
@@ -1,12 +1,9 @@
-import os
+import io
 import logging
 import asyncio
 import traceback
 import html
 import json
-import tempfile
-import pydub
-from pathlib import Path
 from datetime import datetime
 import openai

@@ -342,25 +339,15 @@ async def voice_message_handle(update: Update, context: CallbackContext):
    db.set_user_attribute(user_id, "last_interaction", datetime.now())

    voice = update.message.voice
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        tmp_dir = Path(tmp_dir)
-        voice_ogg_path = tmp_dir / "voice.ogg"
+    voice_file = await context.bot.get_file(voice.file_id)
    
-        # download
-        voice_file = await context.bot.get_file(voice.file_id)
-        await voice_file.download_to_drive(voice_ogg_path)
-
-        # convert to mp3
-        voice_mp3_path = tmp_dir / "voice.mp3"
-        pydub.AudioSegment.from_file(voice_ogg_path).export(voice_mp3_path, format="mp3")
-
-        # transcribe
-        with open(voice_mp3_path, "rb") as f:
-            transcribed_text = await openai_utils.transcribe_audio(f)
-
-            if transcribed_text is None:
-                 transcribed_text = ""
+    # store file in memory, not on disk
+    buf = io.BytesIO()
+    await voice_file.download_to_memory(buf)
+    buf.name = "voice.oga"  # file extension is required
+    buf.seek(0)  # move cursor to the beginning of the buffer

+    transcribed_text = await openai_utils.transcribe_audio(buf)
    text = f"🎤: <i>{transcribed_text}</i>"
    await update.message.reply_text(text, parse_mode=ParseMode.HTML)

@@ -189,9 +189,9 @@ class ChatGPT:
        return n_input_tokens, n_output_tokens


-async def transcribe_audio(audio_file):
+async def transcribe_audio(audio_file) -> str:
    r = await openai.Audio.atranscribe("whisper-1", audio_file)
-    return r["text"]
+    return r["text"] or ""


 async def generate_images(prompt, n_images=4, size="512x512"):
@@ -4,4 +4,3 @@ tiktoken>=0.3.0
 PyYAML==6.0
 pymongo==4.3.3
 python-dotenv==0.21.0
-pydub==0.25.1