Live Stream Audio Describer

Provides real-time audio descriptions of live stream content for visually impaired users

Size

12.3 KB

Version

1.0.1

Created

Mar 12, 2026

Updated

9 days ago

1// ==UserScript==
2// @name		Live Stream Audio Describer
3// @description		Provides real-time audio descriptions of live stream content for visually impaired users
4// @version		1.0.1
5// @match		https://*.youtube.com/*
6// @match		https://*.twitch.tv/*
7// @match		https://*.vimeo.com/*
8// @match		https://*.facebook.com/*
9// @match		https://*.instagram.com/*
10// @icon		https://robomonkey.io/favicon.ico
11// ==/UserScript==
12(function() {
13    'use strict';
14
15    // Configuration
16    const CONFIG = {
17        captureInterval: 10000, // Capture frame every 10 seconds
18        maxDescriptionLength: 200,
19        speechRate: 1.0,
20        speechVolume: 0.8,
21        speechPitch: 1.0
22    };
23
24    // State management
25    let isEnabled = false;
26    let captureTimer = null;
27    let lastDescription = '';
28    let isSpeaking = false;
29    let currentVideo = null;
30
31    // Debounce utility
32    function debounce(func, wait) {
33        let timeout;
34        return function executedFunction(...args) {
35            const later = () => {
36                clearTimeout(timeout);
37                func(...args);
38            };
39            clearTimeout(timeout);
40            timeout = setTimeout(later, wait);
41        };
42    }
43
44    // Find video element on the page
45    function findVideoElement() {
46        // Try to find the main video element
47        const selectors = [
48            'video[src*="blob"]',
49            'video.html5-main-video', // YouTube
50            'video[data-a-player-type="video"]', // Twitch
51            'video.vp-video', // Vimeo
52            'video[playsinline]',
53            'video'
54        ];
55
56        for (const selector of selectors) {
57            const video = document.querySelector(selector);
58            if (video && video.readyState >= 2) {
59                console.log('Found video element:', selector);
60                return video;
61            }
62        }
63        return null;
64    }
65
66    // Capture frame from video
67    function captureVideoFrame(video) {
68        try {
69            const canvas = document.createElement('canvas');
70            canvas.width = video.videoWidth || 640;
71            canvas.height = video.videoHeight || 480;
72            
73            const ctx = canvas.getContext('2d');
74            ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
75            
76            // Convert to base64 image
77            const imageData = canvas.toDataURL('image/jpeg', 0.8);
78            console.log('Captured video frame, size:', imageData.length);
79            return imageData;
80        } catch (error) {
81            console.error('Error capturing video frame:', error);
82            return null;
83        }
84    }
85
86    // Get AI description of the image
87    async function getImageDescription(imageData) {
88        try {
89            console.log('Requesting AI description...');
90            
91            const prompt = `You are an audio describer for visually impaired users watching a live stream. 
92Describe what you see in this video frame in a clear, concise way (max 2-3 sentences). 
93Focus on: main subjects, actions, important visual elements, text on screen, and overall scene context.
94Be natural and conversational, as if describing to a friend.
95
96Describe this image:`;
97
98            const description = await RM.aiCall(prompt + '\n\nImage: ' + imageData);
99            console.log('Received AI description:', description);
100            return description;
101        } catch (error) {
102            console.error('Error getting AI description:', error);
103            return null;
104        }
105    }
106
107    // Speak the description using Web Speech API
108    function speakDescription(text) {
109        if (!text || isSpeaking) {
110            return;
111        }
112
113        // Check if speech synthesis is available
114        if (!('speechSynthesis' in window)) {
115            console.error('Speech synthesis not supported');
116            return;
117        }
118
119        try {
120            isSpeaking = true;
121            const utterance = new SpeechSynthesisUtterance(text);
122            utterance.rate = CONFIG.speechRate;
123            utterance.volume = CONFIG.speechVolume;
124            utterance.pitch = CONFIG.speechPitch;
125            utterance.lang = 'en-US';
126
127            utterance.onend = () => {
128                isSpeaking = false;
129                console.log('Finished speaking description');
130            };
131
132            utterance.onerror = (event) => {
133                isSpeaking = false;
134                console.error('Speech synthesis error:', event);
135            };
136
137            // Cancel any ongoing speech
138            window.speechSynthesis.cancel();
139            window.speechSynthesis.speak(utterance);
140            console.log('Speaking description:', text);
141        } catch (error) {
142            isSpeaking = false;
143            console.error('Error speaking description:', error);
144        }
145    }
146
147    // Process video frame and generate description
148    async function processVideoFrame() {
149        if (!isEnabled || !currentVideo) {
150            return;
151        }
152
153        console.log('Processing video frame...');
154        
155        const imageData = captureVideoFrame(currentVideo);
156        if (!imageData) {
157            console.error('Failed to capture video frame');
158            return;
159        }
160
161        const description = await getImageDescription(imageData);
162        if (description && description !== lastDescription) {
163            lastDescription = description;
164            speakDescription(description);
165            
166            // Store description in state
167            await GM.setValue('lastDescription', description);
168            await GM.setValue('lastDescriptionTime', Date.now());
169        }
170    }
171
172    // Start capturing and describing
173    function startDescribing() {
174        if (isEnabled) {
175            return;
176        }
177
178        console.log('Starting live stream audio descriptions...');
179        isEnabled = true;
180        
181        // Update button state
182        updateButtonState();
183        
184        // Find video element
185        currentVideo = findVideoElement();
186        if (!currentVideo) {
187            console.error('No video element found');
188            speakDescription('No video stream detected. Please make sure a video is playing.');
189            isEnabled = false;
190            updateButtonState();
191            return;
192        }
193
194        // Announce start
195        speakDescription('Audio descriptions enabled. Describing video content every 10 seconds.');
196        
197        // Start periodic capture
198        processVideoFrame(); // Process immediately
199        captureTimer = setInterval(processVideoFrame, CONFIG.captureInterval);
200        
201        // Save state
202        GM.setValue('isEnabled', true);
203    }
204
205    // Stop capturing and describing
206    function stopDescribing() {
207        if (!isEnabled) {
208            return;
209        }
210
211        console.log('Stopping live stream audio descriptions...');
212        isEnabled = false;
213        
214        // Clear timer
215        if (captureTimer) {
216            clearInterval(captureTimer);
217            captureTimer = null;
218        }
219        
220        // Cancel any ongoing speech
221        if ('speechSynthesis' in window) {
222            window.speechSynthesis.cancel();
223        }
224        isSpeaking = false;
225        
226        // Update button state
227        updateButtonState();
228        
229        // Announce stop
230        speakDescription('Audio descriptions disabled.');
231        
232        // Save state
233        GM.setValue('isEnabled', false);
234    }
235
236    // Toggle describing on/off
237    function toggleDescribing() {
238        if (isEnabled) {
239            stopDescribing();
240        } else {
241            startDescribing();
242        }
243    }
244
245    // Create control button UI
246    function createControlButton() {
247        const button = document.createElement('button');
248        button.id = 'audio-describer-toggle';
249        button.innerHTML = `
250            <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
251                <path d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"></path>
252                <circle cx="12" cy="12" r="3"></circle>
253            </svg>
254            <span>Audio Descriptions</span>
255        `;
256        
257        button.style.cssText = `
258            position: fixed;
259            bottom: 20px;
260            right: 20px;
261            z-index: 999999;
262            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
263            color: white;
264            border: none;
265            border-radius: 25px;
266            padding: 12px 20px;
267            font-size: 14px;
268            font-weight: 600;
269            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
270            cursor: pointer;
271            box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3);
272            display: flex;
273            align-items: center;
274            gap: 8px;
275            transition: all 0.3s ease;
276            opacity: 0.9;
277        `;
278        
279        button.addEventListener('mouseenter', () => {
280            button.style.opacity = '1';
281            button.style.transform = 'scale(1.05)';
282        });
283        
284        button.addEventListener('mouseleave', () => {
285            button.style.opacity = '0.9';
286            button.style.transform = 'scale(1)';
287        });
288        
289        button.addEventListener('click', toggleDescribing);
290        
291        document.body.appendChild(button);
292        console.log('Control button created');
293        return button;
294    }
295
296    // Update button state
297    function updateButtonState() {
298        const button = document.getElementById('audio-describer-toggle');
299        if (!button) {
300            return;
301        }
302        
303        if (isEnabled) {
304            button.style.background = 'linear-gradient(135deg, #f093fb 0%, #f5576c 100%)';
305            button.innerHTML = `
306                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
307                    <path d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"></path>
308                    <circle cx="12" cy="12" r="3"></circle>
309                </svg>
310                <span>Describing...</span>
311            `;
312        } else {
313            button.style.background = 'linear-gradient(135deg, #667eea 0%, #764ba2 100%)';
314            button.innerHTML = `
315                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
316                    <path d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"></path>
317                    <circle cx="12" cy="12" r="3"></circle>
318                </svg>
319                <span>Audio Descriptions</span>
320            `;
321        }
322    }
323
324    // Watch for video element changes
325    function watchForVideo() {
326        const observer = new MutationObserver(debounce(() => {
327            if (!currentVideo || !document.contains(currentVideo)) {
328                console.log('Video element changed, searching for new video...');
329                currentVideo = findVideoElement();
330                
331                if (isEnabled && !currentVideo) {
332                    console.log('Video lost while describing, stopping...');
333                    stopDescribing();
334                }
335            }
336        }, 1000));
337        
338        observer.observe(document.body, {
339            childList: true,
340            subtree: true
341        });
342        
343        console.log('Video observer started');
344    }
345
346    // Initialize the extension
347    async function init() {
348        console.log('Live Stream Audio Describer initialized');
349        
350        // Wait for body to be ready
351        if (!document.body) {
352            setTimeout(init, 100);
353            return;
354        }
355        
356        // Create control button
357        setTimeout(() => {
358            createControlButton();
359        }, 2000);
360        
361        // Watch for video changes
362        watchForVideo();
363        
364        // Restore previous state
365        const savedState = await GM.getValue('isEnabled', false);
366        if (savedState) {
367            setTimeout(() => {
368                startDescribing();
369            }, 3000);
370        }
371        
372        // Keyboard shortcut: Alt+D to toggle
373        document.addEventListener('keydown', (e) => {
374            if (e.altKey && e.key === 'd') {
375                e.preventDefault();
376                toggleDescribing();
377            }
378        });
379        
380        console.log('Extension ready. Press Alt+D or click the button to toggle audio descriptions.');
381    }
382
383    // Start the extension
384    if (document.readyState === 'loading') {
385        document.addEventListener('DOMContentLoaded', init);
386    } else {
387        init();
388    }
389})();