Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/generate_changelog/incoming/1810.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### PR [#1810](https://github.com/danielmiessler/Fabric/pull/1810) by [tonymet](https://github.com/tonymet): improve subtitle lang, retry, debugging & error handling

- Improve subtitle lang, retry, debugging & error handling
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kk
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
github.com/anthropics/anthropic-sdk-go v1.12.0 h1:xPqlGnq7rWrTiHazIvCiumA0u7mGQnwDQtvA1M82h9U=
github.com/anthropics/anthropic-sdk-go v1.12.0/go.mod h1:WTz31rIUHUHqai2UslPpw5CwXrQP3geYBioRV4WOLvE=
github.com/anthropics/anthropic-sdk-go v1.13.0 h1:Bhbe8sRoDPtipttg8bQYrMCKe2b79+q6rFW1vOKEUKI=
github.com/anthropics/anthropic-sdk-go v1.13.0/go.mod h1:WTz31rIUHUHqai2UslPpw5CwXrQP3geYBioRV4WOLvE=
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA=
Expand Down
117 changes: 60 additions & 57 deletions internal/tools/youtube/youtube.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
package youtube

import (
"bufio"
"bytes"
"context"
"encoding/csv"
"flag"
"fmt"
"io"
"log"
"os"
"os/exec"
Expand All @@ -26,6 +28,8 @@ import (

"github.com/danielmiessler/fabric/internal/plugins"
"github.com/kballard/go-shellquote"

debuglog "github.com/danielmiessler/fabric/internal/log"
"google.golang.org/api/option"
"google.golang.org/api/youtube/v3"
)
Expand Down Expand Up @@ -143,6 +147,46 @@ func (o *YouTube) GrabTranscriptWithTimestampsWithArgs(videoId string, language
return o.tryMethodYtDlpWithTimestamps(videoId, language, additionalArgs)
}

func detectError(ytOutput io.Reader) error {
scanner := bufio.NewScanner(ytOutput)
for scanner.Scan() {
curLine := scanner.Text()
debuglog.Debug(debuglog.Trace, "%s\n", curLine)
errorMessages := map[string]string{
"429": "YouTube rate limit exceeded. Try again later or use different yt-dlp arguments like '--sleep-requests 1' to slow down requests.",
"Too Many Requests": "YouTube rate limit exceeded. Try again later or use different yt-dlp arguments like '--sleep-requests 1' to slow down requests.",
"Sign in to confirm you're not a bot": "YouTube requires authentication (bot detection). Use --yt-dlp-args='--cookies-from-browser BROWSER' where BROWSER is chrome, firefox, brave, etc.",
"Use --cookies-from-browser": "YouTube requires authentication (bot detection). Use --yt-dlp-args='--cookies-from-browser BROWSER' where BROWSER is chrome, firefox, brave, etc.",
}

for key, message := range errorMessages {
if strings.Contains(curLine, key) {
return fmt.Errorf("%s", message)
}
}
}
if err := scanner.Err(); err != nil {
return fmt.Errorf("Error reading yt-dlp stderr")
}
return nil
}

func noLangs(args []string) []string {
var (
i int
v string
)
for i, v = range args {
if strings.Contains(v, "--sub-langs") {
break
}
}
if i == 0 || i == len(args)-1 {
return args
}
return append(args[0:i], args[i+2:]...)
}

// tryMethodYtDlpInternal is a helper function to reduce duplication between
// tryMethodYtDlp and tryMethodYtDlpWithTimestamps.
func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, additionalArgs string, processVTTFileFunc func(filename string) (string, error)) (ret string, err error) {
Expand All @@ -168,20 +212,18 @@ func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, additi
"--write-auto-subs",
"--skip-download",
"--sub-format", "vtt",
"--quiet",
"--no-warnings",
"-o", outputPath,
}

args := append([]string{}, baseArgs...)

// Add built-in language selection first
if language != "" {
langMatch := language
if len(langMatch) > 2 {
langMatch = langMatch[:2]
langMatch := language[:2]
langOpts := language + "," + langMatch + ".*"
if langMatch != language {
langOpts += "," + langMatch
}
langOpts := language + "," + langMatch + ".*," + langMatch
args = append(args, "--sub-langs", langOpts)
}

Expand All @@ -196,65 +238,26 @@ func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, additi

args = append(args, videoURL)

cmd := exec.Command("yt-dlp", args...)

var stderr bytes.Buffer
cmd.Stderr = &stderr

if err = cmd.Run(); err != nil {
stderrStr := stderr.String()

// Check for specific YouTube errors
if strings.Contains(stderrStr, "429") || strings.Contains(stderrStr, "Too Many Requests") {
err = fmt.Errorf("YouTube rate limit exceeded. Try again later or use different yt-dlp arguments like '--sleep-requests 1' to slow down requests. Error: %v", err)
return
}

if strings.Contains(stderrStr, "Sign in to confirm you're not a bot") || strings.Contains(stderrStr, "Use --cookies-from-browser") {
err = fmt.Errorf("YouTube requires authentication (bot detection). Use --yt-dlp-args='--cookies-from-browser BROWSER' where BROWSER is chrome, firefox, brave, etc. Error: %v", err)
return
}

if language != "" {
// Fallback: try without specifying language (let yt-dlp choose best available)
stderr.Reset()
fallbackArgs := append([]string{}, baseArgs...)

// Add additional arguments if provided
if additionalArgs != "" {
additionalArgsList, parseErr := shellquote.Split(additionalArgs)
if parseErr != nil {
return "", fmt.Errorf("invalid yt-dlp arguments: %v", parseErr)
}
fallbackArgs = append(fallbackArgs, additionalArgsList...)
}

// Don't specify language, let yt-dlp choose
fallbackArgs = append(fallbackArgs, videoURL)
cmd = exec.Command("yt-dlp", fallbackArgs...)
cmd.Stderr = &stderr
if err = cmd.Run(); err != nil {
stderrStr2 := stderr.String()
if strings.Contains(stderrStr2, "429") || strings.Contains(stderrStr2, "Too Many Requests") {
err = fmt.Errorf("YouTube rate limit exceeded. Try again later or use different yt-dlp arguments like '--sleep-requests 1'. Error: %v", err)
} else {
err = fmt.Errorf("yt-dlp failed with language '%s' and fallback. Original error: %s. Fallback error: %s", language, stderrStr, stderrStr2)
}
return
}
} else {
err = fmt.Errorf("yt-dlp failed: %v, stderr: %s", err, stderrStr)
return
for retry := 1; retry >= 0; retry-- {
var ytOutput []byte
cmd := exec.Command("yt-dlp", args...)
debuglog.Debug(debuglog.Trace, "yt-dlp %+v\n", cmd.Args)
ytOutput, err = cmd.CombinedOutput()
ytReader := bytes.NewReader(ytOutput)
if err = detectError(ytReader); err == nil {
break
}
args = noLangs(args)
}
if err != nil {
return
}

// Find VTT files using cross-platform approach
// Try to find files with the requested language first, but fall back to any VTT file
vttFiles, err := o.findVTTFilesWithFallback(tempDir, language)
if err != nil {
return "", err
}

return processVTTFileFunc(vttFiles[0])
}

Expand Down