diff --git a/cli/cli.go b/cli/cli.go index bb6b773..f8178de 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "github.com/danielmiessler/fabric/core" "github.com/danielmiessler/fabric/db" @@ -101,6 +102,46 @@ func Cli() (message string, err error) { // if none of the above currentFlags are set, run the initiate chat function + if currentFlags.YouTube != "" { + if fabric.YouTube.IsConfigured() == false { + err = fmt.Errorf("YouTube is not configured, please run the setup procedure") + return + } + + var videoId string + if videoId, err = fabric.YouTube.GetVideoId(currentFlags.YouTube); err != nil { + return + } + + if currentFlags.YouTubeTranscript { + var transcript string + if transcript, err = fabric.YouTube.GrabTranscript(videoId); err != nil { + return + } + + if currentFlags.Message != "" { + currentFlags.Message = currentFlags.Message + "\n" + transcript + } else { + currentFlags.Message = transcript + } + } + + if currentFlags.YouTubeComments { + var comments []string + if comments, err = fabric.YouTube.GrabComments(videoId); err != nil { + return + } + + commentsString := strings.Join(comments, "\n") + + if currentFlags.Message != "" { + currentFlags.Message = currentFlags.Message + "\n" + commentsString + } else { + currentFlags.Message = commentsString + } + } + } + var chatter *core.Chatter if chatter, err = fabric.GetChatter(currentFlags.Model, currentFlags.Stream); err != nil { return diff --git a/cli/flags.go b/cli/flags.go index c4fde8d..ee8bdd4 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -34,6 +34,9 @@ type Flags struct { Output string `short:"o" long:"output" description:"Output to file" default:""` LatestPatterns string `short:"n" long:"latest" description:"Number of latest patterns to list" default:"0"` ChangeDefaultModel bool `short:"d" long:"changeDefaultModel" description:"Change default pattern"` + YouTube string `short:"y" long:"youtube" description:"YouTube video url to grab transcript, comments from it and send to chat"` + YouTubeTranscript bool `long:"transcript" description:"Grab transcript from YouTube video and send to chat"` + YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"` } // Init Initialize flags. returns a Flags struct and an error diff --git a/go.mod b/go.mod index 7831cce..d86e0fa 100644 --- a/go.mod +++ b/go.mod @@ -30,6 +30,7 @@ require ( dario.cat/mergo v1.0.0 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect github.com/ProtonMail/go-crypto v1.0.0 // indirect + github.com/anaskhan96/soup v1.2.5 // indirect github.com/cloudflare/circl v1.3.7 // indirect github.com/cyphar/filepath-securejoin v0.2.4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/go.sum b/go.sum index 94b3785..35b6b8e 100644 --- a/go.sum +++ b/go.sum @@ -19,6 +19,8 @@ github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migc github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= github.com/ProtonMail/go-crypto v1.0.0 h1:LRuvITjQWX+WIfr930YHG2HNfjR1uOfyf5vE0kC2U78= github.com/ProtonMail/go-crypto v1.0.0/go.mod h1:EjAoLdwvbIOoOQr3ihjnSoLZRtE8azugULFRteWMNc0= +github.com/anaskhan96/soup v1.2.5 h1:V/FHiusdTrPrdF4iA1YkVxsOpdNcgvqT1hG+YtcZ5hM= +github.com/anaskhan96/soup v1.2.5/go.mod h1:6YnEp9A2yywlYdM4EgDz9NEHclocMepEtku7wg6Cq3s= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= @@ -145,6 +147,7 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -187,6 +190,7 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= diff --git a/youtube/youtube.go b/youtube/youtube.go index c0fbc55..0e935a1 100644 --- a/youtube/youtube.go +++ b/youtube/youtube.go @@ -1,7 +1,18 @@ package youtube import ( + "context" + "encoding/json" + "flag" + "fmt" + "github.com/anaskhan96/soup" "github.com/danielmiessler/fabric/common" + "google.golang.org/api/option" + "google.golang.org/api/youtube/v3" + "log" + "regexp" + "strconv" + "strings" ) func NewYouTube() (ret *YouTube) { @@ -22,4 +33,218 @@ func NewYouTube() (ret *YouTube) { type YouTube struct { *common.Configurable ApiKey *common.SetupQuestion + + service *youtube.Service +} + +func (o *YouTube) initService() (err error) { + if o.service == nil { + ctx := context.Background() + o.service, err = youtube.NewService(ctx, option.WithAPIKey(o.ApiKey.Value)) + } + return +} + +func (o *YouTube) GetVideoId(url string) (ret string, err error) { + if err = o.initService(); err != nil { + return + } + + pattern := `(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})` + re := regexp.MustCompile(pattern) + match := re.FindStringSubmatch(url) + if len(match) > 1 { + ret = match[1] + } else { + err = fmt.Errorf("invalid YouTube URL, can't get video ID") + } + return +} + +func (o *YouTube) GrabTranscriptForUrl(url string) (ret string, err error) { + var videoId string + if videoId, err = o.GetVideoId(url); err != nil { + return + } + return o.GrabTranscript(videoId) +} + +func (o *YouTube) GrabTranscript(videoId string) (ret string, err error) { + var transcript string + if transcript, err = o.GrabTranscriptBase(videoId); err != nil { + err = fmt.Errorf("transcript not available. (%v)", err) + return + } + + // Parse the XML transcript + doc := soup.HTMLParse(transcript) + // Extract the text content from the tags + textTags := doc.FindAll("text") + var textBuilder strings.Builder + for _, textTag := range textTags { + textBuilder.WriteString(textTag.Text()) + textBuilder.WriteString(" ") + ret = textBuilder.String() + } + return +} + +func (o *YouTube) GrabTranscriptBase(videoId string) (ret string, err error) { + if err = o.initService(); err != nil { + return + } + + url := "https://www.youtube.com/watch?v=" + videoId + var resp string + if resp, err = soup.Get(url); err != nil { + return + } + + doc := soup.HTMLParse(resp) + scriptTags := doc.FindAll("script") + for _, scriptTag := range scriptTags { + if strings.Contains(scriptTag.Text(), "captionTracks") { + regex := regexp.MustCompile(`"captionTracks":(\[.*?\])`) + match := regex.FindStringSubmatch(scriptTag.Text()) + if len(match) > 1 { + var captionTracks []struct { + BaseURL string `json:"baseUrl"` + } + + if err = json.Unmarshal([]byte(match[1]), &captionTracks); err != nil { + return + } + + if len(captionTracks) > 0 { + transcriptURL := captionTracks[0].BaseURL + ret, err = soup.Get(transcriptURL) + return + } + } + } + } + err = fmt.Errorf("transcript not found") + return +} + +func (o *YouTube) GrabComments(videoId string) (ret []string, err error) { + if err = o.initService(); err != nil { + return + } + + call := o.service.CommentThreads.List([]string{"snippet", "replies"}).VideoId(videoId).TextFormat("plainText").MaxResults(100) + var response *youtube.CommentThreadListResponse + if response, err = call.Do(); err != nil { + log.Printf("Failed to fetch comments: %v", err) + return + } + + for _, item := range response.Items { + topLevelComment := item.Snippet.TopLevelComment.Snippet.TextDisplay + ret = append(ret, topLevelComment) + + if item.Replies != nil { + for _, reply := range item.Replies.Comments { + replyText := reply.Snippet.TextDisplay + ret = append(ret, " - "+replyText) + } + } + } + return +} + +func (o *YouTube) GrabDurationForUrl(url string) (ret int, err error) { + if err = o.initService(); err != nil { + return + } + + var videoId string + if videoId, err = o.GetVideoId(url); err != nil { + return + } + return o.GrabDuration(videoId) +} + +func (o *YouTube) GrabDuration(videoId string) (ret int, err error) { + var videoResponse *youtube.VideoListResponse + if videoResponse, err = o.service.Videos.List([]string{"contentDetails"}).Id(videoId).Do(); err != nil { + err = fmt.Errorf("error getting video details: %v", err) + return + } + + durationStr := videoResponse.Items[0].ContentDetails.Duration + + matches := regexp.MustCompile(`(?i)PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?`).FindStringSubmatch(durationStr) + if len(matches) == 0 { + return 0, fmt.Errorf("invalid duration string: %s", durationStr) + } + + hours, _ := strconv.Atoi(matches[1]) + minutes, _ := strconv.Atoi(matches[2]) + seconds, _ := strconv.Atoi(matches[3]) + + ret = hours*60 + minutes + seconds/60 + + return +} + +func (o *YouTube) Grab(url string, options *Options) (ret *VideoInfo, err error) { + var videoId string + if videoId, err = o.GetVideoId(url); err != nil { + return + } + + ret = &VideoInfo{} + + if options.Duration { + if ret.Duration, err = o.GrabDuration(videoId); err != nil { + err = fmt.Errorf("error parsing video duration: %v", err) + return + } + + } + + if options.Comments { + if ret.Comments, err = o.GrabComments(videoId); err != nil { + err = fmt.Errorf("error getting comments: %v", err) + return + } + } + + if options.Transcript { + if ret.Transcript, err = o.GrabTranscript(videoId); err != nil { + return + } + } + return +} + +type Options struct { + Duration bool + Transcript bool + Comments bool + Lang string +} + +type VideoInfo struct { + Transcript string `json:"transcript"` + Duration int `json:"duration"` + Comments []string `json:"comments"` +} + +func (o *YouTube) GrabByFlags() (ret *VideoInfo, err error) { + options := &Options{} + flag.BoolVar(&options.Duration, "duration", false, "Output only the duration") + flag.BoolVar(&options.Transcript, "transcript", false, "Output only the transcript") + flag.BoolVar(&options.Comments, "comments", false, "Output the comments on the video") + flag.StringVar(&options.Lang, "lang", "en", "Language for the transcript (default: English)") + flag.Parse() + + if flag.NArg() == 0 { + log.Fatal("Error: No URL provided.") + } + + url := flag.Arg(0) + ret, err = o.Grab(url, options) + return }