travisvn · DanTulovsky · Aug 10, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
diff --git a/Taskfile.yml b/Taskfile.yml
@@ -0,0 +1,51 @@
+version: '3'
+
+# Task runner configuration for building and pushing Docker images
+# Requires: https://taskfile.dev
+
+vars:
+  # Docker image repository (override with: IMAGE=yourname/yourrepo task ...)
+  IMAGE:
+    sh: 'echo ${IMAGE:-mrwetsnow/openai-edge-tts}'
+  # Date-time tag (UTC) used when no tag is provided, supports multiple pushes per day
+  DATE_TAG:
+    sh: 'date -u +%Y-%m-%d-%H%M'
+  # All tags to apply: use TAGS (space/comma-separated) or TAG if provided; otherwise fallback to DATE_TAG. Always include 'latest'.
+  ALL_TAGS:
+    sh: |
+      set -e
+      # Prefer TAGS, then TAG; support comma or space separated values
+      if [ -n "${TAGS:-}" ]; then
+        INPUT_TAGS="${TAGS}"
+      elif [ -n "${TAG:-}" ]; then
+        INPUT_TAGS="${TAG}"
+      else
+        INPUT_TAGS="{{.DATE_TAG}}"
+      fi
+      # Normalize commas to spaces
+      INPUT_TAGS=$(echo "$INPUT_TAGS" | tr ',' ' ')
+      echo "$INPUT_TAGS latest"
+  # Whether to include ffmpeg in the image build
+  INSTALL_FFMPEG:
+    sh: 'echo ${INSTALL_FFMPEG:-false}'
+
+tasks:
+  build-push:
+    desc: Build Docker image and push to Docker Hub
+    env:
+      DOCKER_BUILDKIT: '1'
+    cmds:
+      - |
+        bash -ce '
+        tags="{{.ALL_TAGS}}"
+        tag_args=""
+        for t in $tags; do
+          tag_args="$tag_args -t {{.IMAGE}}:$t"
+        done
+        docker build --pull --no-cache $tag_args --build-arg INSTALL_FFMPEG={{.INSTALL_FFMPEG}} .
+        for t in $tags; do
+          docker push {{.IMAGE}}:$t
+        done'
+    silent: false
+
+
diff --git a/app/tts_handler.py b/app/tts_handler.py
@@ -46,25 +46,43 @@ async def _generate_audio_stream(text, voice, speed):
     """Generate streaming TTS audio using edge-tts."""
     # Determine if the voice is an OpenAI-compatible voice or a direct edge-tts voice
     edge_tts_voice = voice_mapping.get(voice, voice)  # Use mapping if in OpenAI names, otherwise use as-is
-    
+
     # Convert speed to SSML rate format
     try:
         speed_rate = speed_to_rate(speed)  # Convert speed value to "+X%" or "-X%"
     except Exception as e:
         print(f"Error converting speed: {e}. Defaulting to +0%.")
         speed_rate = "+0%"
-    
+
     # Create the communicator for streaming
     communicator = edge_tts.Communicate(text=text, voice=edge_tts_voice, rate=speed_rate)
-    
+
     # Stream the audio data
     async for chunk in communicator.stream():
         if chunk["type"] == "audio":
             yield chunk["data"]
 
 def generate_speech_stream(text, voice, speed=1.0):
     """Generate streaming speech audio (synchronous wrapper)."""
-    return asyncio.run(_generate_audio_stream(text, voice, speed))
+    # Drive the async generator from a dedicated event loop and yield chunks synchronously
+    async_generator = _generate_audio_stream(text, voice, speed)
+    loop = asyncio.new_event_loop()
+    try:
+        asyncio.set_event_loop(loop)
+        while True:
+            try:
+                next_chunk = loop.run_until_complete(async_generator.__anext__())
+            except StopAsyncIteration:
+                break
+            yield next_chunk
+    finally:
+        # Best-effort cleanup of async generators and loop
+        try:
+            loop.run_until_complete(loop.shutdown_asyncgens())
+        except Exception:
+            pass
+        asyncio.set_event_loop(None)
+        loop.close()
 
 async def _generate_audio(text, voice, response_format, speed):
     """Generate TTS audio and optionally convert to a different format."""
@@ -137,7 +155,7 @@ async def _generate_audio(text, voice, response_format, speed):
         Path(converted_path).unlink(missing_ok=True)
         # Clean up the original mp3 file as well, since conversion failed
         Path(temp_mp3_path).unlink(missing_ok=True)
-        
+
         if DETAILED_ERROR_LOGGING:
             error_message = f"FFmpeg error during audio conversion. Command: '{' '.join(e.cmd)}'. Stderr: {e.stderr.decode('utf-8', 'ignore')}"
             print(error_message) # Log for server-side diagnosis
@@ -179,10 +197,10 @@ def get_voices(language=None):
 def speed_to_rate(speed: float) -> str:
     """
     Converts a multiplicative speed value to the edge-tts "rate" format.
-    
+
     Args:
         speed (float): The multiplicative speed value (e.g., 1.5 for +50%, 0.5 for -50%).
-    
+
     Returns:
         str: The formatted "rate" string (e.g., "+50%" or "-50%").
     """