Real-time AI-powered audio descriptions for YouTube live streams to assist blind users
Size
15.2 KB
Version
1.1.1
Created
Mar 12, 2026
Updated
about 1 month ago
1// ==UserScript==
2// @name YouTube Live Stream Audio Describer
3// @description Real-time AI-powered audio descriptions for YouTube live streams to assist blind users
4// @version 1.1.1
5// @match https://*.youtube.com/*
6// @icon https://www.youtube.com/s/desktop/38f8912f/img/favicon_32x32.png
7// ==/UserScript==
8(function() {
9 'use strict';
10
11 // Configuration
12 const CONFIG = {
13 captureInterval: 8000, // Capture frame every 8 seconds
14 maxDescriptionLength: 150,
15 speechRate: 1.0,
16 speechVolume: 1.0,
17 speechPitch: 1.0
18 };
19
20 // State management
21 let isDescribing = false;
22 let captureIntervalId = null;
23 let lastDescription = '';
24 let videoElement = null;
25 let isSpeaking = false;
26 let descriptionQueue = [];
27 let currentVideoUrl = '';
28
29 // Utility: Debounce function
30 function debounce(func, wait) {
31 let timeout;
32 return function executedFunction(...args) {
33 const later = () => {
34 clearTimeout(timeout);
35 func(...args);
36 };
37 clearTimeout(timeout);
38 timeout = setTimeout(later, wait);
39 };
40 }
41
42 // Capture video frame as base64 image
43 function captureVideoFrame(video) {
44 try {
45 const canvas = document.createElement('canvas');
46 canvas.width = video.videoWidth || 640;
47 canvas.height = video.videoHeight || 360;
48
49 const ctx = canvas.getContext('2d');
50 ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
51
52 // Convert to base64 with reduced quality for faster processing
53 const dataUrl = canvas.toDataURL('image/jpeg', 0.7);
54 console.log('Frame captured successfully, size:', canvas.width, 'x', canvas.height);
55 return dataUrl;
56 } catch (error) {
57 console.error('Error capturing video frame:', error);
58 return null;
59 }
60 }
61
62 // Analyze frame with AI
63 async function analyzeFrame(frameDataUrl) {
64 try {
65 console.log('Sending frame to AI for analysis...');
66
67 const prompt = `You are describing a live video stream for a blind person. Analyze this video frame and provide a concise, clear description of what's happening. Focus on:
68- Main subjects or people in the frame
69- Their actions or activities
70- Important visual elements or text
71- Scene setting or environment
72- Any significant changes from typical content
73
74Keep the description under ${CONFIG.maxDescriptionLength} words and make it natural for text-to-speech. Be specific and helpful.
75
76Image data: ${frameDataUrl}`;
77
78 const description = await RM.aiCall(prompt);
79 console.log('AI analysis complete:', description);
80 return description;
81 } catch (error) {
82 console.error('Error analyzing frame with AI:', error);
83 return null;
84 }
85 }
86
87 // Speak description using Web Speech API
88 function speakDescription(text) {
89 if (!text || text === lastDescription) {
90 console.log('Skipping duplicate or empty description');
91 return;
92 }
93
94 // Add to queue
95 descriptionQueue.push(text);
96 lastDescription = text;
97
98 // Process queue if not already speaking
99 if (!isSpeaking) {
100 processDescriptionQueue();
101 }
102 }
103
104 // Process description queue
105 function processDescriptionQueue() {
106 if (descriptionQueue.length === 0) {
107 isSpeaking = false;
108 return;
109 }
110
111 isSpeaking = true;
112 const text = descriptionQueue.shift();
113
114 if ('speechSynthesis' in window) {
115 // Cancel any ongoing speech
116 window.speechSynthesis.cancel();
117
118 const utterance = new SpeechSynthesisUtterance(text);
119 utterance.rate = CONFIG.speechRate;
120 utterance.volume = CONFIG.speechVolume;
121 utterance.pitch = CONFIG.speechPitch;
122 utterance.lang = 'en-US';
123
124 utterance.onend = () => {
125 console.log('Finished speaking description');
126 // Process next in queue after a short delay
127 setTimeout(() => processDescriptionQueue(), 500);
128 };
129
130 utterance.onerror = (event) => {
131 console.error('Speech synthesis error:', event);
132 isSpeaking = false;
133 processDescriptionQueue();
134 };
135
136 console.log('Speaking:', text);
137 window.speechSynthesis.speak(utterance);
138
139 // Update status in control panel
140 updateStatus('Describing...');
141 } else {
142 console.error('Speech synthesis not supported');
143 isSpeaking = false;
144 }
145 }
146
147 // Main capture and describe loop
148 async function captureAndDescribe() {
149 if (!isDescribing || !videoElement) {
150 return;
151 }
152
153 console.log('Starting frame capture and analysis...');
154 updateStatus('Capturing frame...');
155
156 const frameData = captureVideoFrame(videoElement);
157 if (!frameData) {
158 console.error('Failed to capture frame');
159 updateStatus('Error: Failed to capture frame');
160 return;
161 }
162
163 updateStatus('Analyzing with AI...');
164 const description = await analyzeFrame(frameData);
165
166 if (description) {
167 speakDescription(description);
168 } else {
169 updateStatus('Error: AI analysis failed');
170 }
171 }
172
173 // Start describing
174 async function startDescribing() {
175 if (isDescribing) {
176 console.log('Already describing');
177 return;
178 }
179
180 videoElement = document.querySelector('video.html5-main-video');
181 if (!videoElement) {
182 alert('No video found on this page. Please navigate to a YouTube video or live stream.');
183 return;
184 }
185
186 console.log('Starting audio descriptions...');
187 isDescribing = true;
188 updateControlPanel();
189
190 // Initial description
191 await captureAndDescribe();
192
193 // Set up interval for continuous descriptions
194 captureIntervalId = setInterval(captureAndDescribe, CONFIG.captureInterval);
195
196 updateStatus('Active - Describing every ' + (CONFIG.captureInterval / 1000) + ' seconds');
197 }
198
199 // Stop describing
200 function stopDescribing() {
201 console.log('Stopping audio descriptions...');
202 isDescribing = false;
203
204 if (captureIntervalId) {
205 clearInterval(captureIntervalId);
206 captureIntervalId = null;
207 }
208
209 // Stop any ongoing speech
210 if ('speechSynthesis' in window) {
211 window.speechSynthesis.cancel();
212 }
213
214 // Clear queue
215 descriptionQueue = [];
216 isSpeaking = false;
217 lastDescription = '';
218
219 updateControlPanel();
220 updateStatus('Stopped');
221 }
222
223 // Update status display
224 function updateStatus(message) {
225 const statusElement = document.getElementById('audio-describer-status');
226 if (statusElement) {
227 statusElement.textContent = message;
228 }
229 }
230
231 // Create control panel UI
232 function createControlPanel() {
233 // Remove existing panel if any
234 const existing = document.getElementById('audio-describer-panel');
235 if (existing) {
236 existing.remove();
237 }
238
239 const panel = document.createElement('div');
240 panel.id = 'audio-describer-panel';
241 panel.style.cssText = `
242 position: fixed;
243 top: 80px;
244 right: 20px;
245 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
246 color: white;
247 padding: 20px;
248 border-radius: 12px;
249 box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
250 z-index: 10000;
251 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
252 min-width: 280px;
253 backdrop-filter: blur(10px);
254 `;
255
256 panel.innerHTML = `
257 <div style="margin-bottom: 15px;">
258 <h3 style="margin: 0 0 5px 0; font-size: 16px; font-weight: 600;">🎙️ Audio Describer</h3>
259 <div id="audio-describer-status" style="font-size: 12px; opacity: 0.9; margin-top: 5px;">Ready</div>
260 </div>
261
262 <button id="audio-describer-toggle" style="
263 width: 100%;
264 padding: 12px;
265 background: white;
266 color: #667eea;
267 border: none;
268 border-radius: 8px;
269 font-size: 14px;
270 font-weight: 600;
271 cursor: pointer;
272 margin-bottom: 12px;
273 transition: all 0.3s ease;
274 ">Start Describing</button>
275
276 <div style="margin-top: 15px; padding-top: 15px; border-top: 1px solid rgba(255,255,255,0.2);">
277 <label style="display: block; font-size: 12px; margin-bottom: 8px; opacity: 0.9;">
278 Description Interval: <span id="interval-value">${CONFIG.captureInterval / 1000}s</span>
279 </label>
280 <input type="range" id="interval-slider" min="5" max="15" value="${CONFIG.captureInterval / 1000}"
281 style="width: 100%; cursor: pointer;">
282
283 <label style="display: block; font-size: 12px; margin: 12px 0 8px 0; opacity: 0.9;">
284 Speech Speed: <span id="speed-value">${CONFIG.speechRate}x</span>
285 </label>
286 <input type="range" id="speed-slider" min="0.5" max="2" step="0.1" value="${CONFIG.speechRate}"
287 style="width: 100%; cursor: pointer;">
288 </div>
289
290 <div style="margin-top: 15px; font-size: 11px; opacity: 0.7; line-height: 1.4;">
291 This extension captures video frames and provides AI-powered audio descriptions for accessibility.
292 </div>
293 `;
294
295 document.body.appendChild(panel);
296
297 // Add event listeners
298 const toggleButton = document.getElementById('audio-describer-toggle');
299 toggleButton.addEventListener('click', () => {
300 if (isDescribing) {
301 stopDescribing();
302 } else {
303 startDescribing();
304 }
305 });
306
307 // Interval slider
308 const intervalSlider = document.getElementById('interval-slider');
309 const intervalValue = document.getElementById('interval-value');
310 intervalSlider.addEventListener('input', (e) => {
311 const value = parseInt(e.target.value);
312 CONFIG.captureInterval = value * 1000;
313 intervalValue.textContent = value + 's';
314
315 // Restart interval if currently describing
316 if (isDescribing && captureIntervalId) {
317 clearInterval(captureIntervalId);
318 captureIntervalId = setInterval(captureAndDescribe, CONFIG.captureInterval);
319 updateStatus('Active - Describing every ' + value + ' seconds');
320 }
321 });
322
323 // Speed slider
324 const speedSlider = document.getElementById('speed-slider');
325 const speedValue = document.getElementById('speed-value');
326 speedSlider.addEventListener('input', (e) => {
327 const value = parseFloat(e.target.value);
328 CONFIG.speechRate = value;
329 speedValue.textContent = value.toFixed(1) + 'x';
330 });
331
332 console.log('Control panel created');
333 }
334
335 // Update control panel state
336 function updateControlPanel() {
337 const toggleButton = document.getElementById('audio-describer-toggle');
338 if (toggleButton) {
339 if (isDescribing) {
340 toggleButton.textContent = 'Stop Describing';
341 toggleButton.style.background = '#ff4757';
342 toggleButton.style.color = 'white';
343 } else {
344 toggleButton.textContent = 'Start Describing';
345 toggleButton.style.background = 'white';
346 toggleButton.style.color = '#667eea';
347 }
348 }
349 }
350
351 // Detect navigation and reset video element
352 function handleNavigation() {
353 const newUrl = window.location.href;
354
355 // Check if URL changed (navigation to new video)
356 if (newUrl !== currentVideoUrl) {
357 console.log('Navigation detected, URL changed from', currentVideoUrl, 'to', newUrl);
358 currentVideoUrl = newUrl;
359
360 // If currently describing, stop and restart with new video
361 if (isDescribing) {
362 console.log('Restarting descriptions for new video...');
363 stopDescribing();
364
365 // Wait for new video element to load
366 setTimeout(() => {
367 videoElement = document.querySelector('video.html5-main-video');
368 if (videoElement) {
369 console.log('New video element found, restarting descriptions');
370 startDescribing();
371 } else {
372 console.log('No video element found after navigation');
373 updateStatus('No video found - navigate to a video');
374 }
375 }, 2000);
376 } else {
377 // Just reset the video element reference
378 videoElement = null;
379 }
380 }
381 }
382
383 // Initialize extension
384 function init() {
385 console.log('YouTube Live Stream Audio Describer initialized');
386
387 // Store initial URL
388 currentVideoUrl = window.location.href;
389
390 // Wait for page to be ready
391 if (document.readyState === 'loading') {
392 document.addEventListener('DOMContentLoaded', createControlPanel);
393 } else {
394 createControlPanel();
395 }
396
397 // Re-create panel on navigation (YouTube is a SPA)
398 const observer = new MutationObserver(debounce(() => {
399 if (!document.getElementById('audio-describer-panel')) {
400 createControlPanel();
401 }
402 }, 1000));
403
404 observer.observe(document.body, {
405 childList: true,
406 subtree: true
407 });
408
409 // Detect URL changes for navigation (YouTube SPA)
410 let lastUrl = window.location.href;
411 new MutationObserver(() => {
412 const currentUrl = window.location.href;
413 if (currentUrl !== lastUrl) {
414 lastUrl = currentUrl;
415 handleNavigation();
416 }
417 }).observe(document.querySelector('title'), {
418 childList: true,
419 subtree: true
420 });
421
422 // Also listen to popstate for back/forward navigation
423 window.addEventListener('popstate', handleNavigation);
424
425 // Keyboard shortcut: Alt+D to toggle
426 document.addEventListener('keydown', (e) => {
427 if (e.altKey && e.key === 'd') {
428 e.preventDefault();
429 if (isDescribing) {
430 stopDescribing();
431 } else {
432 startDescribing();
433 }
434 }
435 });
436
437 console.log('Keyboard shortcut: Alt+D to toggle audio descriptions');
438 }
439
440 // Start the extension
441 init();
442})();