Website Code Extractor

Extract all HTML and JavaScript from the current website and save as a single text file

Size

12.3 KB

Version

1.0.1

Created

Feb 4, 2026

Updated

13 days ago

1// ==UserScript==
2// @name		Website Code Extractor
3// @description		Extract all HTML and JavaScript from the current website and save as a single text file
4// @version		1.0.1
5// @match		*://*/*
6// @icon		https://www.gstatic.com/images/branding/searchlogo/ico/favicon.ico
7// @grant		GM.xmlhttpRequest
8// @grant		GM.download
9// ==/UserScript==
10(function() {
11    'use strict';
12    
13    console.log('Website Code Extractor initialized');
14    
15    // Exclusion patterns for third-party CDN scripts
16    const excludePatterns = [
17        'cloudflare',
18        'googleapis',
19        'google-analytics',
20        'googletagmanager',
21        'cdnjs',
22        'jquery',
23        'bootstrap',
24        'fontawesome',
25        'cdn.',
26        'beacon',
27        'analytics',
28        'tracking',
29        'gtag',
30        'ga.js',
31        'gstatic',
32        'doubleclick',
33        'googlesyndication',
34        'facebook.net',
35        'fbcdn',
36        'amazonaws',
37        'jsdelivr',
38        'unpkg.com',
39        'polyfill.io'
40    ];
41    
42    // Function to detect obfuscated JavaScript
43    function isObfuscatedJavaScript(content) {
44        if (!content || content.length === 0) {
45            return false;
46        }
47        
48        // Check for obfuscation indicators
49        const hexPattern = /0x[0-9a-fA-F]+/g;
50        const obfuscatedVarPattern = /_0x[0-9a-fA-F]+/g;
51        
52        const hexMatches = (content.match(hexPattern) || []).length;
53        const obfuscatedVarMatches = (content.match(obfuscatedVarPattern) || []).length;
54        const totalLength = content.length;
55        
56        // Calculate ratios
57        const hexRatio = totalLength > 0 ? (hexMatches * 10) / totalLength : 0;
58        const obfuscatedVarRatio = totalLength > 0 ? (obfuscatedVarMatches * 15) / totalLength : 0;
59        
60        // Check for very long lines (common in obfuscated code)
61        const lines = content.split('\n');
62        const longLines = lines.filter(line => line.length > 500).length;
63        const longLineRatio = lines.length > 0 ? longLines / lines.length : 0;
64        
65        // File is likely obfuscated if conditions are met
66        return (hexRatio > 0.01) || (obfuscatedVarRatio > 0.01) || (longLineRatio > 0.2);
67    }
68    
69    // Function to check if URL should be excluded
70    function shouldExcludeUrl(url) {
71        const urlLower = url.toLowerCase();
72        return excludePatterns.some(pattern => urlLower.includes(pattern));
73    }
74    
75    // Function to fetch resource content
76    async function fetchResource(url) {
77        return new Promise((resolve, reject) => {
78            GM.xmlhttpRequest({
79                method: 'GET',
80                url: url,
81                onload: function(response) {
82                    resolve(response.responseText);
83                },
84                onerror: function(error) {
85                    console.error('Failed to fetch:', url, error);
86                    resolve('[ERROR: Could not fetch resource]');
87                },
88                ontimeout: function() {
89                    console.error('Timeout fetching:', url);
90                    resolve('[ERROR: Request timeout]');
91                }
92            });
93        });
94    }
95    
96    // Function to collect all resources
97    async function collectAllResources(statusCallback) {
98        const resources = [];
99        let excludedCount = 0;
100        
101        statusCallback('Collecting resources from page...');
102        
103        // Get current page HTML
104        const currentPageHtml = document.documentElement.outerHTML;
105        resources.push({
106            url: window.location.href,
107            type: 'HTML',
108            content: currentPageHtml,
109            size: currentPageHtml.length
110        });
111        
112        // Get all script tags
113        const scripts = document.querySelectorAll('script[src]');
114        statusCallback(`Found ${scripts.length} script tags...`);
115        
116        for (let i = 0; i < scripts.length; i++) {
117            const script = scripts[i];
118            let src = script.src;
119            
120            // Convert relative URLs to absolute
121            if (src && !src.startsWith('http')) {
122                src = new URL(src, window.location.href).href;
123            }
124            
125            if (shouldExcludeUrl(src)) {
126                console.log('Excluding (CDN):', src);
127                excludedCount++;
128                continue;
129            }
130            
131            statusCallback(`Fetching script ${i + 1}/${scripts.length}...`);
132            
133            try {
134                const content = await fetchResource(src);
135                
136                if (isObfuscatedJavaScript(content)) {
137                    console.log('Excluding (obfuscated):', src);
138                    excludedCount++;
139                    continue;
140                }
141                
142                resources.push({
143                    url: src,
144                    type: 'JavaScript',
145                    content: content,
146                    size: content.length
147                });
148            } catch (error) {
149                console.error('Error fetching script:', src, error);
150            }
151        }
152        
153        // Get inline scripts
154        const inlineScripts = document.querySelectorAll('script:not([src])');
155        statusCallback(`Processing ${inlineScripts.length} inline scripts...`);
156        
157        inlineScripts.forEach((script, index) => {
158            const content = script.textContent || script.innerText;
159            if (content && content.trim().length > 0) {
160                if (!isObfuscatedJavaScript(content)) {
161                    resources.push({
162                        url: `${window.location.href}#inline-script-${index + 1}`,
163                        type: 'JavaScript (Inline)',
164                        content: content,
165                        size: content.length
166                    });
167                } else {
168                    excludedCount++;
169                }
170            }
171        });
172        
173        return { resources, excludedCount };
174    }
175    
176    // Function to generate combined text file
177    function generateCombinedText(resources, excludedCount) {
178        const lines = [];
179        const separator = '='.repeat(80);
180        
181        lines.push(separator);
182        lines.push(`COMBINED FILES FROM WEBSITE: ${window.location.href}`);
183        lines.push(`Generated: ${new Date().toLocaleString()}`);
184        lines.push(`Total resources: ${resources.length}`);
185        lines.push(`Excluded resources: ${excludedCount}`);
186        lines.push(`Filter: HTML and JavaScript files only`);
187        lines.push(`Excluded: Third-party CDN scripts and obfuscated JavaScript`);
188        lines.push(separator);
189        lines.push('');
190        
191        resources.forEach((resource, index) => {
192            lines.push('');
193            lines.push(separator);
194            lines.push(`FILE ${index + 1}: ${resource.url}`);
195            lines.push(`TYPE: ${resource.type}`);
196            lines.push(`SIZE: ${resource.size} bytes`);
197            lines.push(separator);
198            lines.push('');
199            lines.push(resource.content);
200            lines.push('');
201        });
202        
203        return lines.join('\n');
204    }
205    
206    // Function to download the combined file
207    async function downloadCombinedFile(content) {
208        const blob = new Blob([content], { type: 'text/plain;charset=utf-8' });
209        const url = URL.createObjectURL(blob);
210        const domain = window.location.hostname.replace(/[^a-z0-9]/gi, '_');
211        const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
212        const filename = `${domain}_combined_${timestamp}.txt`;
213        
214        // Create a temporary link and click it
215        const a = document.createElement('a');
216        a.href = url;
217        a.download = filename;
218        document.body.appendChild(a);
219        a.click();
220        document.body.removeChild(a);
221        URL.revokeObjectURL(url);
222        
223        console.log('Downloaded:', filename);
224    }
225    
226    // Function to create the extraction button UI
227    function createExtractionUI() {
228        // Create the button container
229        const container = document.createElement('div');
230        container.id = 'code-extractor-container';
231        container.style.cssText = `
232            position: fixed;
233            bottom: 20px;
234            right: 20px;
235            z-index: 999999;
236            font-family: Arial, sans-serif;
237        `;
238        
239        // Create the main button
240        const button = document.createElement('button');
241        button.id = 'code-extractor-button';
242        button.textContent = '📄 Extract Code';
243        button.style.cssText = `
244            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
245            color: white;
246            border: none;
247            border-radius: 50px;
248            padding: 15px 25px;
249            font-size: 16px;
250            font-weight: bold;
251            cursor: pointer;
252            box-shadow: 0 4px 15px rgba(0,0,0,0.2);
253            transition: all 0.3s ease;
254            display: flex;
255            align-items: center;
256            gap: 8px;
257        `;
258        
259        // Create status display
260        const status = document.createElement('div');
261        status.id = 'code-extractor-status';
262        status.style.cssText = `
263            background: white;
264            color: #333;
265            border: 2px solid #667eea;
266            border-radius: 8px;
267            padding: 12px 20px;
268            font-size: 14px;
269            margin-bottom: 10px;
270            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
271            display: none;
272            max-width: 300px;
273            word-wrap: break-word;
274        `;
275        
276        // Add hover effects
277        button.addEventListener('mouseenter', () => {
278            button.style.transform = 'scale(1.05)';
279            button.style.boxShadow = '0 6px 20px rgba(0,0,0,0.3)';
280        });
281        
282        button.addEventListener('mouseleave', () => {
283            button.style.transform = 'scale(1)';
284            button.style.boxShadow = '0 4px 15px rgba(0,0,0,0.2)';
285        });
286        
287        // Add click handler
288        button.addEventListener('click', async () => {
289            button.disabled = true;
290            button.textContent = '⏳ Extracting...';
291            status.style.display = 'block';
292            
293            try {
294                const { resources, excludedCount } = await collectAllResources((msg) => {
295                    status.textContent = msg;
296                });
297                
298                status.textContent = 'Generating combined file...';
299                const combinedText = generateCombinedText(resources, excludedCount);
300                
301                status.textContent = 'Downloading file...';
302                await downloadCombinedFile(combinedText);
303                
304                status.textContent = `✅ Success! Downloaded ${resources.length} resources`;
305                status.style.borderColor = '#10b981';
306                status.style.color = '#10b981';
307                
308                setTimeout(() => {
309                    status.style.display = 'none';
310                    button.disabled = false;
311                    button.textContent = '📄 Extract Code';
312                    status.style.borderColor = '#667eea';
313                    status.style.color = '#333';
314                }, 3000);
315                
316            } catch (error) {
317                console.error('Extraction error:', error);
318                status.textContent = '❌ Error: ' + error.message;
319                status.style.borderColor = '#ef4444';
320                status.style.color = '#ef4444';
321                
322                setTimeout(() => {
323                    status.style.display = 'none';
324                    button.disabled = false;
325                    button.textContent = '📄 Extract Code';
326                    status.style.borderColor = '#667eea';
327                    status.style.color = '#333';
328                }, 3000);
329            }
330        });
331        
332        // Assemble the UI
333        container.appendChild(status);
334        container.appendChild(button);
335        document.body.appendChild(container);
336        
337        console.log('Code Extractor UI created');
338    }
339    
340    // Initialize the extension
341    function init() {
342        // Wait for page to be ready
343        if (document.readyState === 'loading') {
344            document.addEventListener('DOMContentLoaded', createExtractionUI);
345        } else {
346            createExtractionUI();
347        }
348    }
349    
350    // Start the extension
351    init();
352})();
Website Code Extractor | Robomonkey