YouTube Live Stream Audio Describer

Real-time AI-powered audio descriptions for YouTube live streams to assist blind users

Size

15.2 KB

Version

1.1.1

Created

Mar 12, 2026

Updated

about 1 month ago

1// ==UserScript==
2// @name		YouTube Live Stream Audio Describer
3// @description		Real-time AI-powered audio descriptions for YouTube live streams to assist blind users
4// @version		1.1.1
5// @match		https://*.youtube.com/*
6// @icon		https://www.youtube.com/s/desktop/38f8912f/img/favicon_32x32.png
7// ==/UserScript==
8(function() {
9    'use strict';
10
11    // Configuration
12    const CONFIG = {
13        captureInterval: 8000, // Capture frame every 8 seconds
14        maxDescriptionLength: 150,
15        speechRate: 1.0,
16        speechVolume: 1.0,
17        speechPitch: 1.0
18    };
19
20    // State management
21    let isDescribing = false;
22    let captureIntervalId = null;
23    let lastDescription = '';
24    let videoElement = null;
25    let isSpeaking = false;
26    let descriptionQueue = [];
27    let currentVideoUrl = '';
28
29    // Utility: Debounce function
30    function debounce(func, wait) {
31        let timeout;
32        return function executedFunction(...args) {
33            const later = () => {
34                clearTimeout(timeout);
35                func(...args);
36            };
37            clearTimeout(timeout);
38            timeout = setTimeout(later, wait);
39        };
40    }
41
42    // Capture video frame as base64 image
43    function captureVideoFrame(video) {
44        try {
45            const canvas = document.createElement('canvas');
46            canvas.width = video.videoWidth || 640;
47            canvas.height = video.videoHeight || 360;
48            
49            const ctx = canvas.getContext('2d');
50            ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
51            
52            // Convert to base64 with reduced quality for faster processing
53            const dataUrl = canvas.toDataURL('image/jpeg', 0.7);
54            console.log('Frame captured successfully, size:', canvas.width, 'x', canvas.height);
55            return dataUrl;
56        } catch (error) {
57            console.error('Error capturing video frame:', error);
58            return null;
59        }
60    }
61
62    // Analyze frame with AI
63    async function analyzeFrame(frameDataUrl) {
64        try {
65            console.log('Sending frame to AI for analysis...');
66            
67            const prompt = `You are describing a live video stream for a blind person. Analyze this video frame and provide a concise, clear description of what's happening. Focus on:
68- Main subjects or people in the frame
69- Their actions or activities
70- Important visual elements or text
71- Scene setting or environment
72- Any significant changes from typical content
73
74Keep the description under ${CONFIG.maxDescriptionLength} words and make it natural for text-to-speech. Be specific and helpful.
75
76Image data: ${frameDataUrl}`;
77
78            const description = await RM.aiCall(prompt);
79            console.log('AI analysis complete:', description);
80            return description;
81        } catch (error) {
82            console.error('Error analyzing frame with AI:', error);
83            return null;
84        }
85    }
86
87    // Speak description using Web Speech API
88    function speakDescription(text) {
89        if (!text || text === lastDescription) {
90            console.log('Skipping duplicate or empty description');
91            return;
92        }
93
94        // Add to queue
95        descriptionQueue.push(text);
96        lastDescription = text;
97        
98        // Process queue if not already speaking
99        if (!isSpeaking) {
100            processDescriptionQueue();
101        }
102    }
103
104    // Process description queue
105    function processDescriptionQueue() {
106        if (descriptionQueue.length === 0) {
107            isSpeaking = false;
108            return;
109        }
110
111        isSpeaking = true;
112        const text = descriptionQueue.shift();
113
114        if ('speechSynthesis' in window) {
115            // Cancel any ongoing speech
116            window.speechSynthesis.cancel();
117
118            const utterance = new SpeechSynthesisUtterance(text);
119            utterance.rate = CONFIG.speechRate;
120            utterance.volume = CONFIG.speechVolume;
121            utterance.pitch = CONFIG.speechPitch;
122            utterance.lang = 'en-US';
123
124            utterance.onend = () => {
125                console.log('Finished speaking description');
126                // Process next in queue after a short delay
127                setTimeout(() => processDescriptionQueue(), 500);
128            };
129
130            utterance.onerror = (event) => {
131                console.error('Speech synthesis error:', event);
132                isSpeaking = false;
133                processDescriptionQueue();
134            };
135
136            console.log('Speaking:', text);
137            window.speechSynthesis.speak(utterance);
138            
139            // Update status in control panel
140            updateStatus('Describing...');
141        } else {
142            console.error('Speech synthesis not supported');
143            isSpeaking = false;
144        }
145    }
146
147    // Main capture and describe loop
148    async function captureAndDescribe() {
149        if (!isDescribing || !videoElement) {
150            return;
151        }
152
153        console.log('Starting frame capture and analysis...');
154        updateStatus('Capturing frame...');
155
156        const frameData = captureVideoFrame(videoElement);
157        if (!frameData) {
158            console.error('Failed to capture frame');
159            updateStatus('Error: Failed to capture frame');
160            return;
161        }
162
163        updateStatus('Analyzing with AI...');
164        const description = await analyzeFrame(frameData);
165        
166        if (description) {
167            speakDescription(description);
168        } else {
169            updateStatus('Error: AI analysis failed');
170        }
171    }
172
173    // Start describing
174    async function startDescribing() {
175        if (isDescribing) {
176            console.log('Already describing');
177            return;
178        }
179
180        videoElement = document.querySelector('video.html5-main-video');
181        if (!videoElement) {
182            alert('No video found on this page. Please navigate to a YouTube video or live stream.');
183            return;
184        }
185
186        console.log('Starting audio descriptions...');
187        isDescribing = true;
188        updateControlPanel();
189        
190        // Initial description
191        await captureAndDescribe();
192        
193        // Set up interval for continuous descriptions
194        captureIntervalId = setInterval(captureAndDescribe, CONFIG.captureInterval);
195        
196        updateStatus('Active - Describing every ' + (CONFIG.captureInterval / 1000) + ' seconds');
197    }
198
199    // Stop describing
200    function stopDescribing() {
201        console.log('Stopping audio descriptions...');
202        isDescribing = false;
203        
204        if (captureIntervalId) {
205            clearInterval(captureIntervalId);
206            captureIntervalId = null;
207        }
208
209        // Stop any ongoing speech
210        if ('speechSynthesis' in window) {
211            window.speechSynthesis.cancel();
212        }
213        
214        // Clear queue
215        descriptionQueue = [];
216        isSpeaking = false;
217        lastDescription = '';
218        
219        updateControlPanel();
220        updateStatus('Stopped');
221    }
222
223    // Update status display
224    function updateStatus(message) {
225        const statusElement = document.getElementById('audio-describer-status');
226        if (statusElement) {
227            statusElement.textContent = message;
228        }
229    }
230
231    // Create control panel UI
232    function createControlPanel() {
233        // Remove existing panel if any
234        const existing = document.getElementById('audio-describer-panel');
235        if (existing) {
236            existing.remove();
237        }
238
239        const panel = document.createElement('div');
240        panel.id = 'audio-describer-panel';
241        panel.style.cssText = `
242            position: fixed;
243            top: 80px;
244            right: 20px;
245            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
246            color: white;
247            padding: 20px;
248            border-radius: 12px;
249            box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
250            z-index: 10000;
251            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
252            min-width: 280px;
253            backdrop-filter: blur(10px);
254        `;
255
256        panel.innerHTML = `
257            <div style="margin-bottom: 15px;">
258                <h3 style="margin: 0 0 5px 0; font-size: 16px; font-weight: 600;">🎙️ Audio Describer</h3>
259                <div id="audio-describer-status" style="font-size: 12px; opacity: 0.9; margin-top: 5px;">Ready</div>
260            </div>
261            
262            <button id="audio-describer-toggle" style="
263                width: 100%;
264                padding: 12px;
265                background: white;
266                color: #667eea;
267                border: none;
268                border-radius: 8px;
269                font-size: 14px;
270                font-weight: 600;
271                cursor: pointer;
272                margin-bottom: 12px;
273                transition: all 0.3s ease;
274            ">Start Describing</button>
275            
276            <div style="margin-top: 15px; padding-top: 15px; border-top: 1px solid rgba(255,255,255,0.2);">
277                <label style="display: block; font-size: 12px; margin-bottom: 8px; opacity: 0.9;">
278                    Description Interval: <span id="interval-value">${CONFIG.captureInterval / 1000}s</span>
279                </label>
280                <input type="range" id="interval-slider" min="5" max="15" value="${CONFIG.captureInterval / 1000}" 
281                    style="width: 100%; cursor: pointer;">
282                
283                <label style="display: block; font-size: 12px; margin: 12px 0 8px 0; opacity: 0.9;">
284                    Speech Speed: <span id="speed-value">${CONFIG.speechRate}x</span>
285                </label>
286                <input type="range" id="speed-slider" min="0.5" max="2" step="0.1" value="${CONFIG.speechRate}" 
287                    style="width: 100%; cursor: pointer;">
288            </div>
289            
290            <div style="margin-top: 15px; font-size: 11px; opacity: 0.7; line-height: 1.4;">
291                This extension captures video frames and provides AI-powered audio descriptions for accessibility.
292            </div>
293        `;
294
295        document.body.appendChild(panel);
296
297        // Add event listeners
298        const toggleButton = document.getElementById('audio-describer-toggle');
299        toggleButton.addEventListener('click', () => {
300            if (isDescribing) {
301                stopDescribing();
302            } else {
303                startDescribing();
304            }
305        });
306
307        // Interval slider
308        const intervalSlider = document.getElementById('interval-slider');
309        const intervalValue = document.getElementById('interval-value');
310        intervalSlider.addEventListener('input', (e) => {
311            const value = parseInt(e.target.value);
312            CONFIG.captureInterval = value * 1000;
313            intervalValue.textContent = value + 's';
314            
315            // Restart interval if currently describing
316            if (isDescribing && captureIntervalId) {
317                clearInterval(captureIntervalId);
318                captureIntervalId = setInterval(captureAndDescribe, CONFIG.captureInterval);
319                updateStatus('Active - Describing every ' + value + ' seconds');
320            }
321        });
322
323        // Speed slider
324        const speedSlider = document.getElementById('speed-slider');
325        const speedValue = document.getElementById('speed-value');
326        speedSlider.addEventListener('input', (e) => {
327            const value = parseFloat(e.target.value);
328            CONFIG.speechRate = value;
329            speedValue.textContent = value.toFixed(1) + 'x';
330        });
331
332        console.log('Control panel created');
333    }
334
335    // Update control panel state
336    function updateControlPanel() {
337        const toggleButton = document.getElementById('audio-describer-toggle');
338        if (toggleButton) {
339            if (isDescribing) {
340                toggleButton.textContent = 'Stop Describing';
341                toggleButton.style.background = '#ff4757';
342                toggleButton.style.color = 'white';
343            } else {
344                toggleButton.textContent = 'Start Describing';
345                toggleButton.style.background = 'white';
346                toggleButton.style.color = '#667eea';
347            }
348        }
349    }
350
351    // Detect navigation and reset video element
352    function handleNavigation() {
353        const newUrl = window.location.href;
354        
355        // Check if URL changed (navigation to new video)
356        if (newUrl !== currentVideoUrl) {
357            console.log('Navigation detected, URL changed from', currentVideoUrl, 'to', newUrl);
358            currentVideoUrl = newUrl;
359            
360            // If currently describing, stop and restart with new video
361            if (isDescribing) {
362                console.log('Restarting descriptions for new video...');
363                stopDescribing();
364                
365                // Wait for new video element to load
366                setTimeout(() => {
367                    videoElement = document.querySelector('video.html5-main-video');
368                    if (videoElement) {
369                        console.log('New video element found, restarting descriptions');
370                        startDescribing();
371                    } else {
372                        console.log('No video element found after navigation');
373                        updateStatus('No video found - navigate to a video');
374                    }
375                }, 2000);
376            } else {
377                // Just reset the video element reference
378                videoElement = null;
379            }
380        }
381    }
382
383    // Initialize extension
384    function init() {
385        console.log('YouTube Live Stream Audio Describer initialized');
386        
387        // Store initial URL
388        currentVideoUrl = window.location.href;
389        
390        // Wait for page to be ready
391        if (document.readyState === 'loading') {
392            document.addEventListener('DOMContentLoaded', createControlPanel);
393        } else {
394            createControlPanel();
395        }
396
397        // Re-create panel on navigation (YouTube is a SPA)
398        const observer = new MutationObserver(debounce(() => {
399            if (!document.getElementById('audio-describer-panel')) {
400                createControlPanel();
401            }
402        }, 1000));
403
404        observer.observe(document.body, {
405            childList: true,
406            subtree: true
407        });
408
409        // Detect URL changes for navigation (YouTube SPA)
410        let lastUrl = window.location.href;
411        new MutationObserver(() => {
412            const currentUrl = window.location.href;
413            if (currentUrl !== lastUrl) {
414                lastUrl = currentUrl;
415                handleNavigation();
416            }
417        }).observe(document.querySelector('title'), {
418            childList: true,
419            subtree: true
420        });
421
422        // Also listen to popstate for back/forward navigation
423        window.addEventListener('popstate', handleNavigation);
424
425        // Keyboard shortcut: Alt+D to toggle
426        document.addEventListener('keydown', (e) => {
427            if (e.altKey && e.key === 'd') {
428                e.preventDefault();
429                if (isDescribing) {
430                    stopDescribing();
431                } else {
432                    startDescribing();
433                }
434            }
435        });
436
437        console.log('Keyboard shortcut: Alt+D to toggle audio descriptions');
438    }
439
440    // Start the extension
441    init();
442})();