Provides real-time audio descriptions of live stream content for visually impaired users
Size
12.3 KB
Version
1.0.1
Created
Mar 12, 2026
Updated
9 days ago
1// ==UserScript==
2// @name Live Stream Audio Describer
3// @description Provides real-time audio descriptions of live stream content for visually impaired users
4// @version 1.0.1
5// @match https://*.youtube.com/*
6// @match https://*.twitch.tv/*
7// @match https://*.vimeo.com/*
8// @match https://*.facebook.com/*
9// @match https://*.instagram.com/*
10// @icon https://robomonkey.io/favicon.ico
11// ==/UserScript==
12(function() {
13 'use strict';
14
15 // Configuration
16 const CONFIG = {
17 captureInterval: 10000, // Capture frame every 10 seconds
18 maxDescriptionLength: 200,
19 speechRate: 1.0,
20 speechVolume: 0.8,
21 speechPitch: 1.0
22 };
23
24 // State management
25 let isEnabled = false;
26 let captureTimer = null;
27 let lastDescription = '';
28 let isSpeaking = false;
29 let currentVideo = null;
30
31 // Debounce utility
32 function debounce(func, wait) {
33 let timeout;
34 return function executedFunction(...args) {
35 const later = () => {
36 clearTimeout(timeout);
37 func(...args);
38 };
39 clearTimeout(timeout);
40 timeout = setTimeout(later, wait);
41 };
42 }
43
44 // Find video element on the page
45 function findVideoElement() {
46 // Try to find the main video element
47 const selectors = [
48 'video[src*="blob"]',
49 'video.html5-main-video', // YouTube
50 'video[data-a-player-type="video"]', // Twitch
51 'video.vp-video', // Vimeo
52 'video[playsinline]',
53 'video'
54 ];
55
56 for (const selector of selectors) {
57 const video = document.querySelector(selector);
58 if (video && video.readyState >= 2) {
59 console.log('Found video element:', selector);
60 return video;
61 }
62 }
63 return null;
64 }
65
66 // Capture frame from video
67 function captureVideoFrame(video) {
68 try {
69 const canvas = document.createElement('canvas');
70 canvas.width = video.videoWidth || 640;
71 canvas.height = video.videoHeight || 480;
72
73 const ctx = canvas.getContext('2d');
74 ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
75
76 // Convert to base64 image
77 const imageData = canvas.toDataURL('image/jpeg', 0.8);
78 console.log('Captured video frame, size:', imageData.length);
79 return imageData;
80 } catch (error) {
81 console.error('Error capturing video frame:', error);
82 return null;
83 }
84 }
85
86 // Get AI description of the image
87 async function getImageDescription(imageData) {
88 try {
89 console.log('Requesting AI description...');
90
91 const prompt = `You are an audio describer for visually impaired users watching a live stream.
92Describe what you see in this video frame in a clear, concise way (max 2-3 sentences).
93Focus on: main subjects, actions, important visual elements, text on screen, and overall scene context.
94Be natural and conversational, as if describing to a friend.
95
96Describe this image:`;
97
98 const description = await RM.aiCall(prompt + '\n\nImage: ' + imageData);
99 console.log('Received AI description:', description);
100 return description;
101 } catch (error) {
102 console.error('Error getting AI description:', error);
103 return null;
104 }
105 }
106
107 // Speak the description using Web Speech API
108 function speakDescription(text) {
109 if (!text || isSpeaking) {
110 return;
111 }
112
113 // Check if speech synthesis is available
114 if (!('speechSynthesis' in window)) {
115 console.error('Speech synthesis not supported');
116 return;
117 }
118
119 try {
120 isSpeaking = true;
121 const utterance = new SpeechSynthesisUtterance(text);
122 utterance.rate = CONFIG.speechRate;
123 utterance.volume = CONFIG.speechVolume;
124 utterance.pitch = CONFIG.speechPitch;
125 utterance.lang = 'en-US';
126
127 utterance.onend = () => {
128 isSpeaking = false;
129 console.log('Finished speaking description');
130 };
131
132 utterance.onerror = (event) => {
133 isSpeaking = false;
134 console.error('Speech synthesis error:', event);
135 };
136
137 // Cancel any ongoing speech
138 window.speechSynthesis.cancel();
139 window.speechSynthesis.speak(utterance);
140 console.log('Speaking description:', text);
141 } catch (error) {
142 isSpeaking = false;
143 console.error('Error speaking description:', error);
144 }
145 }
146
147 // Process video frame and generate description
148 async function processVideoFrame() {
149 if (!isEnabled || !currentVideo) {
150 return;
151 }
152
153 console.log('Processing video frame...');
154
155 const imageData = captureVideoFrame(currentVideo);
156 if (!imageData) {
157 console.error('Failed to capture video frame');
158 return;
159 }
160
161 const description = await getImageDescription(imageData);
162 if (description && description !== lastDescription) {
163 lastDescription = description;
164 speakDescription(description);
165
166 // Store description in state
167 await GM.setValue('lastDescription', description);
168 await GM.setValue('lastDescriptionTime', Date.now());
169 }
170 }
171
172 // Start capturing and describing
173 function startDescribing() {
174 if (isEnabled) {
175 return;
176 }
177
178 console.log('Starting live stream audio descriptions...');
179 isEnabled = true;
180
181 // Update button state
182 updateButtonState();
183
184 // Find video element
185 currentVideo = findVideoElement();
186 if (!currentVideo) {
187 console.error('No video element found');
188 speakDescription('No video stream detected. Please make sure a video is playing.');
189 isEnabled = false;
190 updateButtonState();
191 return;
192 }
193
194 // Announce start
195 speakDescription('Audio descriptions enabled. Describing video content every 10 seconds.');
196
197 // Start periodic capture
198 processVideoFrame(); // Process immediately
199 captureTimer = setInterval(processVideoFrame, CONFIG.captureInterval);
200
201 // Save state
202 GM.setValue('isEnabled', true);
203 }
204
205 // Stop capturing and describing
206 function stopDescribing() {
207 if (!isEnabled) {
208 return;
209 }
210
211 console.log('Stopping live stream audio descriptions...');
212 isEnabled = false;
213
214 // Clear timer
215 if (captureTimer) {
216 clearInterval(captureTimer);
217 captureTimer = null;
218 }
219
220 // Cancel any ongoing speech
221 if ('speechSynthesis' in window) {
222 window.speechSynthesis.cancel();
223 }
224 isSpeaking = false;
225
226 // Update button state
227 updateButtonState();
228
229 // Announce stop
230 speakDescription('Audio descriptions disabled.');
231
232 // Save state
233 GM.setValue('isEnabled', false);
234 }
235
236 // Toggle describing on/off
237 function toggleDescribing() {
238 if (isEnabled) {
239 stopDescribing();
240 } else {
241 startDescribing();
242 }
243 }
244
245 // Create control button UI
246 function createControlButton() {
247 const button = document.createElement('button');
248 button.id = 'audio-describer-toggle';
249 button.innerHTML = `
250 <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
251 <path d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"></path>
252 <circle cx="12" cy="12" r="3"></circle>
253 </svg>
254 <span>Audio Descriptions</span>
255 `;
256
257 button.style.cssText = `
258 position: fixed;
259 bottom: 20px;
260 right: 20px;
261 z-index: 999999;
262 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
263 color: white;
264 border: none;
265 border-radius: 25px;
266 padding: 12px 20px;
267 font-size: 14px;
268 font-weight: 600;
269 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
270 cursor: pointer;
271 box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3);
272 display: flex;
273 align-items: center;
274 gap: 8px;
275 transition: all 0.3s ease;
276 opacity: 0.9;
277 `;
278
279 button.addEventListener('mouseenter', () => {
280 button.style.opacity = '1';
281 button.style.transform = 'scale(1.05)';
282 });
283
284 button.addEventListener('mouseleave', () => {
285 button.style.opacity = '0.9';
286 button.style.transform = 'scale(1)';
287 });
288
289 button.addEventListener('click', toggleDescribing);
290
291 document.body.appendChild(button);
292 console.log('Control button created');
293 return button;
294 }
295
296 // Update button state
297 function updateButtonState() {
298 const button = document.getElementById('audio-describer-toggle');
299 if (!button) {
300 return;
301 }
302
303 if (isEnabled) {
304 button.style.background = 'linear-gradient(135deg, #f093fb 0%, #f5576c 100%)';
305 button.innerHTML = `
306 <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
307 <path d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"></path>
308 <circle cx="12" cy="12" r="3"></circle>
309 </svg>
310 <span>Describing...</span>
311 `;
312 } else {
313 button.style.background = 'linear-gradient(135deg, #667eea 0%, #764ba2 100%)';
314 button.innerHTML = `
315 <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
316 <path d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"></path>
317 <circle cx="12" cy="12" r="3"></circle>
318 </svg>
319 <span>Audio Descriptions</span>
320 `;
321 }
322 }
323
324 // Watch for video element changes
325 function watchForVideo() {
326 const observer = new MutationObserver(debounce(() => {
327 if (!currentVideo || !document.contains(currentVideo)) {
328 console.log('Video element changed, searching for new video...');
329 currentVideo = findVideoElement();
330
331 if (isEnabled && !currentVideo) {
332 console.log('Video lost while describing, stopping...');
333 stopDescribing();
334 }
335 }
336 }, 1000));
337
338 observer.observe(document.body, {
339 childList: true,
340 subtree: true
341 });
342
343 console.log('Video observer started');
344 }
345
346 // Initialize the extension
347 async function init() {
348 console.log('Live Stream Audio Describer initialized');
349
350 // Wait for body to be ready
351 if (!document.body) {
352 setTimeout(init, 100);
353 return;
354 }
355
356 // Create control button
357 setTimeout(() => {
358 createControlButton();
359 }, 2000);
360
361 // Watch for video changes
362 watchForVideo();
363
364 // Restore previous state
365 const savedState = await GM.getValue('isEnabled', false);
366 if (savedState) {
367 setTimeout(() => {
368 startDescribing();
369 }, 3000);
370 }
371
372 // Keyboard shortcut: Alt+D to toggle
373 document.addEventListener('keydown', (e) => {
374 if (e.altKey && e.key === 'd') {
375 e.preventDefault();
376 toggleDescribing();
377 }
378 });
379
380 console.log('Extension ready. Press Alt+D or click the button to toggle audio descriptions.');
381 }
382
383 // Start the extension
384 if (document.readyState === 'loading') {
385 document.addEventListener('DOMContentLoaded', init);
386 } else {
387 init();
388 }
389})();