From e0d2361aab1baab4c08271e959bb06c3a47a12ab Mon Sep 17 00:00:00 2001 From: raisindetre Date: Sun, 17 Mar 2024 19:18:04 +1300 Subject: [PATCH 1/2] Added comment retrieval option to yt.py --- installer/client/cli/yt.py | 78 ++++++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/installer/client/cli/yt.py b/installer/client/cli/yt.py index ff45be4..28a929d 100644 --- a/installer/client/cli/yt.py +++ b/installer/client/cli/yt.py @@ -17,6 +17,37 @@ def get_video_id(url): return match.group(1) if match else None +def get_comments(youtube, video_id): + # Fetch comments for the video + comments = [] + try: + response = youtube.commentThreads().list( + part="snippet", + videoId=video_id, + textFormat="plainText", + maxResults=100 # Adjust based on needs + ).execute() + + while response: + for item in response['items']: + comment = item['snippet']['topLevelComment']['snippet']['textDisplay'] + comments.append(comment) + + if 'nextPageToken' in response: + response = youtube.commentThreads().list( + part="snippet", + videoId=video_id, + textFormat="plainText", + pageToken=response['nextPageToken'], + maxResults=100 # Adjust based on needs + ).execute() + else: + break + except HttpError as e: + print(f"Failed to fetch comments: {e}") + return comments + + def main_function(url, options): # Load environment variables from .env file load_dotenv(os.path.expanduser("~/.config/fabric/.env")) @@ -38,9 +69,8 @@ def main_function(url, options): youtube = build("youtube", "v3", developerKey=api_key) # Get video details - video_response = ( - youtube.videos().list(id=video_id, part="contentDetails").execute() - ) + video_response = youtube.videos().list( + id=video_id, part="contentDetails").execute() # Extract video duration and convert to minutes duration_iso = video_response["items"][0]["contentDetails"]["duration"] @@ -50,41 +80,51 @@ def main_function(url, options): # Get video transcript try: transcript_list = YouTubeTranscriptApi.get_transcript(video_id) - transcript_text = " ".join([item["text"] - for item in transcript_list]) + transcript_text = " ".join([item["text"] for item in transcript_list]) transcript_text = transcript_text.replace("\n", " ") except Exception as e: transcript_text = f"Transcript not available. ({e})" + # Get comments if the flag is set + comments = [] + if options.comments: + comments = get_comments(youtube, video_id) + # Output based on options if options.duration: print(duration_minutes) elif options.transcript: print(transcript_text) + elif options.comments: + print(json.dumps(comments, indent=2)) else: - # Create JSON object - output = {"transcript": transcript_text, - "duration": duration_minutes} + # Create JSON object with all data + output = { + "transcript": transcript_text, + "duration": duration_minutes, + "comments": comments + } # Print JSON object - print(json.dumps(output)) + print(json.dumps(output, indent=2)) except HttpError as e: - - print( - f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}") + print(f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}") def main(): parser = argparse.ArgumentParser( - description='yt (video meta) extracts metadata about a video, such as the transcript and the video\'s duration. By Daniel Miessler.') - # Ensure 'url' is defined once + description='yt (video meta) extracts metadata about a video, such as the transcript, the video\'s duration, and now comments. By Daniel Miessler.') parser.add_argument('url', help='YouTube video URL') - parser.add_argument('--duration', action='store_true', - help='Output only the duration') - parser.add_argument('--transcript', action='store_true', - help='Output only the transcript') + parser.add_argument('--duration', action='store_true', help='Output only the duration') + parser.add_argument('--transcript', action='store_true', help='Output only the transcript') + parser.add_argument('--comments', action='store_true', help='Output the comments on the video') + args = parser.parse_args() if args.url is None: - args.url = sys.stdin.readline().strip() + print("Error: No URL provided.") + return main_function(args.url, args) + +if __name__ == "__main__": + main() From e6df0f93f06395dcb39a1dab796da1acf2de9cb7 Mon Sep 17 00:00:00 2001 From: raisindetre Date: Sun, 17 Mar 2024 20:29:56 +1300 Subject: [PATCH 2/2] yt comments includes reply threads. Readme updated. --- helper_files/README.md | 7 ++++--- installer/client/cli/yt.py | 41 +++++++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/helper_files/README.md b/helper_files/README.md index e8772f9..48cc87e 100644 --- a/helper_files/README.md +++ b/helper_files/README.md @@ -4,7 +4,7 @@ These are helper tools to work with Fabric. Examples include things like getting ## yt (YouTube) -`yt` is a command that uses the YouTube API to pull transcripts, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns. +`yt` is a command that uses the YouTube API to pull transcripts, pull user comments, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns. ```bash usage: yt [-h] [--duration] [--transcript] [url] @@ -15,9 +15,10 @@ positional arguments: url YouTube video URL options: - -h, --help show this help message and exit + -h, --help Show this help message and exit --duration Output only the duration --transcript Output only the transcript + --comments Output only the user comments ``` ## ts (Audio transcriptions) @@ -49,7 +50,7 @@ positional arguments: options: -h, --help show this help message and exit - +```` ## save `save` is a "tee-like" utility to pipeline saving of content, while keeping the output stream intact. Can optionally generate "frontmatter" for PKM utilities like Obsidian via the diff --git a/installer/client/cli/yt.py b/installer/client/cli/yt.py index 28a929d..b0449a0 100644 --- a/installer/client/cli/yt.py +++ b/installer/client/cli/yt.py @@ -18,36 +18,45 @@ def get_video_id(url): def get_comments(youtube, video_id): - # Fetch comments for the video comments = [] + try: - response = youtube.commentThreads().list( - part="snippet", + # Fetch top-level comments + request = youtube.commentThreads().list( + part="snippet,replies", videoId=video_id, textFormat="plainText", maxResults=100 # Adjust based on needs - ).execute() + ) - while response: + while request: + response = request.execute() for item in response['items']: - comment = item['snippet']['topLevelComment']['snippet']['textDisplay'] - comments.append(comment) - + # Top-level comment + topLevelComment = item['snippet']['topLevelComment']['snippet']['textDisplay'] + comments.append(topLevelComment) + + # Check if there are replies in the thread + if 'replies' in item: + for reply in item['replies']['comments']: + replyText = reply['snippet']['textDisplay'] + # Add incremental spacing and a dash for replies + comments.append(" - " + replyText) + + # Prepare the next page of comments, if available if 'nextPageToken' in response: - response = youtube.commentThreads().list( - part="snippet", - videoId=video_id, - textFormat="plainText", - pageToken=response['nextPageToken'], - maxResults=100 # Adjust based on needs - ).execute() + request = youtube.commentThreads().list_next( + previous_request=request, previous_response=response) else: - break + request = None + except HttpError as e: print(f"Failed to fetch comments: {e}") + return comments + def main_function(url, options): # Load environment variables from .env file load_dotenv(os.path.expanduser("~/.config/fabric/.env"))