diff --git a/helper_files/README.md b/helper_files/README.md index e8772f9..48cc87e 100644 --- a/helper_files/README.md +++ b/helper_files/README.md @@ -4,7 +4,7 @@ These are helper tools to work with Fabric. Examples include things like getting ## yt (YouTube) -`yt` is a command that uses the YouTube API to pull transcripts, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns. +`yt` is a command that uses the YouTube API to pull transcripts, pull user comments, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns. ```bash usage: yt [-h] [--duration] [--transcript] [url] @@ -15,9 +15,10 @@ positional arguments: url YouTube video URL options: - -h, --help show this help message and exit + -h, --help Show this help message and exit --duration Output only the duration --transcript Output only the transcript + --comments Output only the user comments ``` ## ts (Audio transcriptions) @@ -49,7 +50,7 @@ positional arguments: options: -h, --help show this help message and exit - +```` ## save `save` is a "tee-like" utility to pipeline saving of content, while keeping the output stream intact. Can optionally generate "frontmatter" for PKM utilities like Obsidian via the diff --git a/installer/client/cli/yt.py b/installer/client/cli/yt.py index ff45be4..b0449a0 100644 --- a/installer/client/cli/yt.py +++ b/installer/client/cli/yt.py @@ -17,6 +17,46 @@ def get_video_id(url): return match.group(1) if match else None +def get_comments(youtube, video_id): + comments = [] + + try: + # Fetch top-level comments + request = youtube.commentThreads().list( + part="snippet,replies", + videoId=video_id, + textFormat="plainText", + maxResults=100 # Adjust based on needs + ) + + while request: + response = request.execute() + for item in response['items']: + # Top-level comment + topLevelComment = item['snippet']['topLevelComment']['snippet']['textDisplay'] + comments.append(topLevelComment) + + # Check if there are replies in the thread + if 'replies' in item: + for reply in item['replies']['comments']: + replyText = reply['snippet']['textDisplay'] + # Add incremental spacing and a dash for replies + comments.append(" - " + replyText) + + # Prepare the next page of comments, if available + if 'nextPageToken' in response: + request = youtube.commentThreads().list_next( + previous_request=request, previous_response=response) + else: + request = None + + except HttpError as e: + print(f"Failed to fetch comments: {e}") + + return comments + + + def main_function(url, options): # Load environment variables from .env file load_dotenv(os.path.expanduser("~/.config/fabric/.env")) @@ -38,9 +78,8 @@ def main_function(url, options): youtube = build("youtube", "v3", developerKey=api_key) # Get video details - video_response = ( - youtube.videos().list(id=video_id, part="contentDetails").execute() - ) + video_response = youtube.videos().list( + id=video_id, part="contentDetails").execute() # Extract video duration and convert to minutes duration_iso = video_response["items"][0]["contentDetails"]["duration"] @@ -50,41 +89,51 @@ def main_function(url, options): # Get video transcript try: transcript_list = YouTubeTranscriptApi.get_transcript(video_id) - transcript_text = " ".join([item["text"] - for item in transcript_list]) + transcript_text = " ".join([item["text"] for item in transcript_list]) transcript_text = transcript_text.replace("\n", " ") except Exception as e: transcript_text = f"Transcript not available. ({e})" + # Get comments if the flag is set + comments = [] + if options.comments: + comments = get_comments(youtube, video_id) + # Output based on options if options.duration: print(duration_minutes) elif options.transcript: print(transcript_text) + elif options.comments: + print(json.dumps(comments, indent=2)) else: - # Create JSON object - output = {"transcript": transcript_text, - "duration": duration_minutes} + # Create JSON object with all data + output = { + "transcript": transcript_text, + "duration": duration_minutes, + "comments": comments + } # Print JSON object - print(json.dumps(output)) + print(json.dumps(output, indent=2)) except HttpError as e: - - print( - f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}") + print(f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}") def main(): parser = argparse.ArgumentParser( - description='yt (video meta) extracts metadata about a video, such as the transcript and the video\'s duration. By Daniel Miessler.') - # Ensure 'url' is defined once + description='yt (video meta) extracts metadata about a video, such as the transcript, the video\'s duration, and now comments. By Daniel Miessler.') parser.add_argument('url', help='YouTube video URL') - parser.add_argument('--duration', action='store_true', - help='Output only the duration') - parser.add_argument('--transcript', action='store_true', - help='Output only the transcript') + parser.add_argument('--duration', action='store_true', help='Output only the duration') + parser.add_argument('--transcript', action='store_true', help='Output only the transcript') + parser.add_argument('--comments', action='store_true', help='Output the comments on the video') + args = parser.parse_args() if args.url is None: - args.url = sys.stdin.readline().strip() + print("Error: No URL provided.") + return main_function(args.url, args) + +if __name__ == "__main__": + main()