From be37d889a02bc8106fc4d8e937a723dcaf7fd76b Mon Sep 17 00:00:00 2001 From: "Kristof Ringleff, Fooman" Date: Tue, 1 Oct 2024 14:51:32 +1300 Subject: [PATCH] Grab transcript from youtube matching the user's language instead of the first one --- cli/cli.go | 6 +++++- youtube/youtube.go | 27 +++++++++++++++++++-------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/cli/cli.go b/cli/cli.go index a6f5c29..1f1feef 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -130,7 +130,11 @@ func Cli(version string) (message string, err error) { if !currentFlags.YouTubeComments || currentFlags.YouTubeTranscript { var transcript string - if transcript, err = fabric.YouTube.GrabTranscript(videoId); err != nil { + var language = "en" + if currentFlags.Language != "" { + language = currentFlags.Language + } + if transcript, err = fabric.YouTube.GrabTranscript(videoId, language); err != nil { return } diff --git a/youtube/youtube.go b/youtube/youtube.go index 0e935a1..08d8c11 100644 --- a/youtube/youtube.go +++ b/youtube/youtube.go @@ -10,6 +10,7 @@ import ( "google.golang.org/api/option" "google.golang.org/api/youtube/v3" "log" + "net/url" "regexp" "strconv" "strings" @@ -61,17 +62,17 @@ func (o *YouTube) GetVideoId(url string) (ret string, err error) { return } -func (o *YouTube) GrabTranscriptForUrl(url string) (ret string, err error) { +func (o *YouTube) GrabTranscriptForUrl(url string, language string) (ret string, err error) { var videoId string if videoId, err = o.GetVideoId(url); err != nil { return } - return o.GrabTranscript(videoId) + return o.GrabTranscript(videoId, language) } -func (o *YouTube) GrabTranscript(videoId string) (ret string, err error) { +func (o *YouTube) GrabTranscript(videoId string, language string) (ret string, err error) { var transcript string - if transcript, err = o.GrabTranscriptBase(videoId); err != nil { + if transcript, err = o.GrabTranscriptBase(videoId, language); err != nil { err = fmt.Errorf("transcript not available. (%v)", err) return } @@ -89,14 +90,14 @@ func (o *YouTube) GrabTranscript(videoId string) (ret string, err error) { return } -func (o *YouTube) GrabTranscriptBase(videoId string) (ret string, err error) { +func (o *YouTube) GrabTranscriptBase(videoId string, language string) (ret string, err error) { if err = o.initService(); err != nil { return } - url := "https://www.youtube.com/watch?v=" + videoId + watchUrl := "https://www.youtube.com/watch?v=" + videoId var resp string - if resp, err = soup.Get(url); err != nil { + if resp, err = soup.Get(watchUrl); err != nil { return } @@ -117,6 +118,16 @@ func (o *YouTube) GrabTranscriptBase(videoId string) (ret string, err error) { if len(captionTracks) > 0 { transcriptURL := captionTracks[0].BaseURL + for _, captionTrack := range captionTracks { + parsedUrl, error := url.Parse(captionTrack.BaseURL) + if error != nil { + err = fmt.Errorf("error parsing caption track") + } + parsedUrlParams, _ := url.ParseQuery(parsedUrl.RawQuery) + if parsedUrlParams["lang"][0] == language { + transcriptURL = captionTrack.BaseURL + } + } ret, err = soup.Get(transcriptURL) return } @@ -212,7 +223,7 @@ func (o *YouTube) Grab(url string, options *Options) (ret *VideoInfo, err error) } if options.Transcript { - if ret.Transcript, err = o.GrabTranscript(videoId); err != nil { + if ret.Transcript, err = o.GrabTranscript(videoId, "en"); err != nil { return } }