Merge pull request #1011 from fooman/grab-language-specific-transcription-from-youtube

Grab transcript from youtube matching the user's language
This commit is contained in:
Eugen Eisler 2024-10-01 17:10:29 +02:00 committed by GitHub
commit 5d3e0dae49
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 24 additions and 9 deletions

View File

@ -130,7 +130,11 @@ func Cli(version string) (message string, err error) {
if !currentFlags.YouTubeComments || currentFlags.YouTubeTranscript {
var transcript string
if transcript, err = fabric.YouTube.GrabTranscript(videoId); err != nil {
var language = "en"
if currentFlags.Language != "" {
language = currentFlags.Language
}
if transcript, err = fabric.YouTube.GrabTranscript(videoId, language); err != nil {
return
}

View File

@ -10,6 +10,7 @@ import (
"google.golang.org/api/option"
"google.golang.org/api/youtube/v3"
"log"
"net/url"
"regexp"
"strconv"
"strings"
@ -61,17 +62,17 @@ func (o *YouTube) GetVideoId(url string) (ret string, err error) {
return
}
func (o *YouTube) GrabTranscriptForUrl(url string) (ret string, err error) {
func (o *YouTube) GrabTranscriptForUrl(url string, language string) (ret string, err error) {
var videoId string
if videoId, err = o.GetVideoId(url); err != nil {
return
}
return o.GrabTranscript(videoId)
return o.GrabTranscript(videoId, language)
}
func (o *YouTube) GrabTranscript(videoId string) (ret string, err error) {
func (o *YouTube) GrabTranscript(videoId string, language string) (ret string, err error) {
var transcript string
if transcript, err = o.GrabTranscriptBase(videoId); err != nil {
if transcript, err = o.GrabTranscriptBase(videoId, language); err != nil {
err = fmt.Errorf("transcript not available. (%v)", err)
return
}
@ -89,14 +90,14 @@ func (o *YouTube) GrabTranscript(videoId string) (ret string, err error) {
return
}
func (o *YouTube) GrabTranscriptBase(videoId string) (ret string, err error) {
func (o *YouTube) GrabTranscriptBase(videoId string, language string) (ret string, err error) {
if err = o.initService(); err != nil {
return
}
url := "https://www.youtube.com/watch?v=" + videoId
watchUrl := "https://www.youtube.com/watch?v=" + videoId
var resp string
if resp, err = soup.Get(url); err != nil {
if resp, err = soup.Get(watchUrl); err != nil {
return
}
@ -117,6 +118,16 @@ func (o *YouTube) GrabTranscriptBase(videoId string) (ret string, err error) {
if len(captionTracks) > 0 {
transcriptURL := captionTracks[0].BaseURL
for _, captionTrack := range captionTracks {
parsedUrl, error := url.Parse(captionTrack.BaseURL)
if error != nil {
err = fmt.Errorf("error parsing caption track")
}
parsedUrlParams, _ := url.ParseQuery(parsedUrl.RawQuery)
if parsedUrlParams["lang"][0] == language {
transcriptURL = captionTrack.BaseURL
}
}
ret, err = soup.Get(transcriptURL)
return
}
@ -212,7 +223,7 @@ func (o *YouTube) Grab(url string, options *Options) (ret *VideoInfo, err error)
}
if options.Transcript {
if ret.Transcript, err = o.GrabTranscript(videoId); err != nil {
if ret.Transcript, err = o.GrabTranscript(videoId, "en"); err != nil {
return
}
}