Extract all HTML and JavaScript from the current website and save as a single text file
Size
12.3 KB
Version
1.0.1
Created
Feb 4, 2026
Updated
13 days ago
1// ==UserScript==
2// @name Website Code Extractor
3// @description Extract all HTML and JavaScript from the current website and save as a single text file
4// @version 1.0.1
5// @match *://*/*
6// @icon https://www.gstatic.com/images/branding/searchlogo/ico/favicon.ico
7// @grant GM.xmlhttpRequest
8// @grant GM.download
9// ==/UserScript==
10(function() {
11 'use strict';
12
13 console.log('Website Code Extractor initialized');
14
15 // Exclusion patterns for third-party CDN scripts
16 const excludePatterns = [
17 'cloudflare',
18 'googleapis',
19 'google-analytics',
20 'googletagmanager',
21 'cdnjs',
22 'jquery',
23 'bootstrap',
24 'fontawesome',
25 'cdn.',
26 'beacon',
27 'analytics',
28 'tracking',
29 'gtag',
30 'ga.js',
31 'gstatic',
32 'doubleclick',
33 'googlesyndication',
34 'facebook.net',
35 'fbcdn',
36 'amazonaws',
37 'jsdelivr',
38 'unpkg.com',
39 'polyfill.io'
40 ];
41
42 // Function to detect obfuscated JavaScript
43 function isObfuscatedJavaScript(content) {
44 if (!content || content.length === 0) {
45 return false;
46 }
47
48 // Check for obfuscation indicators
49 const hexPattern = /0x[0-9a-fA-F]+/g;
50 const obfuscatedVarPattern = /_0x[0-9a-fA-F]+/g;
51
52 const hexMatches = (content.match(hexPattern) || []).length;
53 const obfuscatedVarMatches = (content.match(obfuscatedVarPattern) || []).length;
54 const totalLength = content.length;
55
56 // Calculate ratios
57 const hexRatio = totalLength > 0 ? (hexMatches * 10) / totalLength : 0;
58 const obfuscatedVarRatio = totalLength > 0 ? (obfuscatedVarMatches * 15) / totalLength : 0;
59
60 // Check for very long lines (common in obfuscated code)
61 const lines = content.split('\n');
62 const longLines = lines.filter(line => line.length > 500).length;
63 const longLineRatio = lines.length > 0 ? longLines / lines.length : 0;
64
65 // File is likely obfuscated if conditions are met
66 return (hexRatio > 0.01) || (obfuscatedVarRatio > 0.01) || (longLineRatio > 0.2);
67 }
68
69 // Function to check if URL should be excluded
70 function shouldExcludeUrl(url) {
71 const urlLower = url.toLowerCase();
72 return excludePatterns.some(pattern => urlLower.includes(pattern));
73 }
74
75 // Function to fetch resource content
76 async function fetchResource(url) {
77 return new Promise((resolve, reject) => {
78 GM.xmlhttpRequest({
79 method: 'GET',
80 url: url,
81 onload: function(response) {
82 resolve(response.responseText);
83 },
84 onerror: function(error) {
85 console.error('Failed to fetch:', url, error);
86 resolve('[ERROR: Could not fetch resource]');
87 },
88 ontimeout: function() {
89 console.error('Timeout fetching:', url);
90 resolve('[ERROR: Request timeout]');
91 }
92 });
93 });
94 }
95
96 // Function to collect all resources
97 async function collectAllResources(statusCallback) {
98 const resources = [];
99 let excludedCount = 0;
100
101 statusCallback('Collecting resources from page...');
102
103 // Get current page HTML
104 const currentPageHtml = document.documentElement.outerHTML;
105 resources.push({
106 url: window.location.href,
107 type: 'HTML',
108 content: currentPageHtml,
109 size: currentPageHtml.length
110 });
111
112 // Get all script tags
113 const scripts = document.querySelectorAll('script[src]');
114 statusCallback(`Found ${scripts.length} script tags...`);
115
116 for (let i = 0; i < scripts.length; i++) {
117 const script = scripts[i];
118 let src = script.src;
119
120 // Convert relative URLs to absolute
121 if (src && !src.startsWith('http')) {
122 src = new URL(src, window.location.href).href;
123 }
124
125 if (shouldExcludeUrl(src)) {
126 console.log('Excluding (CDN):', src);
127 excludedCount++;
128 continue;
129 }
130
131 statusCallback(`Fetching script ${i + 1}/${scripts.length}...`);
132
133 try {
134 const content = await fetchResource(src);
135
136 if (isObfuscatedJavaScript(content)) {
137 console.log('Excluding (obfuscated):', src);
138 excludedCount++;
139 continue;
140 }
141
142 resources.push({
143 url: src,
144 type: 'JavaScript',
145 content: content,
146 size: content.length
147 });
148 } catch (error) {
149 console.error('Error fetching script:', src, error);
150 }
151 }
152
153 // Get inline scripts
154 const inlineScripts = document.querySelectorAll('script:not([src])');
155 statusCallback(`Processing ${inlineScripts.length} inline scripts...`);
156
157 inlineScripts.forEach((script, index) => {
158 const content = script.textContent || script.innerText;
159 if (content && content.trim().length > 0) {
160 if (!isObfuscatedJavaScript(content)) {
161 resources.push({
162 url: `${window.location.href}#inline-script-${index + 1}`,
163 type: 'JavaScript (Inline)',
164 content: content,
165 size: content.length
166 });
167 } else {
168 excludedCount++;
169 }
170 }
171 });
172
173 return { resources, excludedCount };
174 }
175
176 // Function to generate combined text file
177 function generateCombinedText(resources, excludedCount) {
178 const lines = [];
179 const separator = '='.repeat(80);
180
181 lines.push(separator);
182 lines.push(`COMBINED FILES FROM WEBSITE: ${window.location.href}`);
183 lines.push(`Generated: ${new Date().toLocaleString()}`);
184 lines.push(`Total resources: ${resources.length}`);
185 lines.push(`Excluded resources: ${excludedCount}`);
186 lines.push(`Filter: HTML and JavaScript files only`);
187 lines.push(`Excluded: Third-party CDN scripts and obfuscated JavaScript`);
188 lines.push(separator);
189 lines.push('');
190
191 resources.forEach((resource, index) => {
192 lines.push('');
193 lines.push(separator);
194 lines.push(`FILE ${index + 1}: ${resource.url}`);
195 lines.push(`TYPE: ${resource.type}`);
196 lines.push(`SIZE: ${resource.size} bytes`);
197 lines.push(separator);
198 lines.push('');
199 lines.push(resource.content);
200 lines.push('');
201 });
202
203 return lines.join('\n');
204 }
205
206 // Function to download the combined file
207 async function downloadCombinedFile(content) {
208 const blob = new Blob([content], { type: 'text/plain;charset=utf-8' });
209 const url = URL.createObjectURL(blob);
210 const domain = window.location.hostname.replace(/[^a-z0-9]/gi, '_');
211 const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
212 const filename = `${domain}_combined_${timestamp}.txt`;
213
214 // Create a temporary link and click it
215 const a = document.createElement('a');
216 a.href = url;
217 a.download = filename;
218 document.body.appendChild(a);
219 a.click();
220 document.body.removeChild(a);
221 URL.revokeObjectURL(url);
222
223 console.log('Downloaded:', filename);
224 }
225
226 // Function to create the extraction button UI
227 function createExtractionUI() {
228 // Create the button container
229 const container = document.createElement('div');
230 container.id = 'code-extractor-container';
231 container.style.cssText = `
232 position: fixed;
233 bottom: 20px;
234 right: 20px;
235 z-index: 999999;
236 font-family: Arial, sans-serif;
237 `;
238
239 // Create the main button
240 const button = document.createElement('button');
241 button.id = 'code-extractor-button';
242 button.textContent = '📄 Extract Code';
243 button.style.cssText = `
244 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
245 color: white;
246 border: none;
247 border-radius: 50px;
248 padding: 15px 25px;
249 font-size: 16px;
250 font-weight: bold;
251 cursor: pointer;
252 box-shadow: 0 4px 15px rgba(0,0,0,0.2);
253 transition: all 0.3s ease;
254 display: flex;
255 align-items: center;
256 gap: 8px;
257 `;
258
259 // Create status display
260 const status = document.createElement('div');
261 status.id = 'code-extractor-status';
262 status.style.cssText = `
263 background: white;
264 color: #333;
265 border: 2px solid #667eea;
266 border-radius: 8px;
267 padding: 12px 20px;
268 font-size: 14px;
269 margin-bottom: 10px;
270 box-shadow: 0 2px 10px rgba(0,0,0,0.1);
271 display: none;
272 max-width: 300px;
273 word-wrap: break-word;
274 `;
275
276 // Add hover effects
277 button.addEventListener('mouseenter', () => {
278 button.style.transform = 'scale(1.05)';
279 button.style.boxShadow = '0 6px 20px rgba(0,0,0,0.3)';
280 });
281
282 button.addEventListener('mouseleave', () => {
283 button.style.transform = 'scale(1)';
284 button.style.boxShadow = '0 4px 15px rgba(0,0,0,0.2)';
285 });
286
287 // Add click handler
288 button.addEventListener('click', async () => {
289 button.disabled = true;
290 button.textContent = '⏳ Extracting...';
291 status.style.display = 'block';
292
293 try {
294 const { resources, excludedCount } = await collectAllResources((msg) => {
295 status.textContent = msg;
296 });
297
298 status.textContent = 'Generating combined file...';
299 const combinedText = generateCombinedText(resources, excludedCount);
300
301 status.textContent = 'Downloading file...';
302 await downloadCombinedFile(combinedText);
303
304 status.textContent = `✅ Success! Downloaded ${resources.length} resources`;
305 status.style.borderColor = '#10b981';
306 status.style.color = '#10b981';
307
308 setTimeout(() => {
309 status.style.display = 'none';
310 button.disabled = false;
311 button.textContent = '📄 Extract Code';
312 status.style.borderColor = '#667eea';
313 status.style.color = '#333';
314 }, 3000);
315
316 } catch (error) {
317 console.error('Extraction error:', error);
318 status.textContent = '❌ Error: ' + error.message;
319 status.style.borderColor = '#ef4444';
320 status.style.color = '#ef4444';
321
322 setTimeout(() => {
323 status.style.display = 'none';
324 button.disabled = false;
325 button.textContent = '📄 Extract Code';
326 status.style.borderColor = '#667eea';
327 status.style.color = '#333';
328 }, 3000);
329 }
330 });
331
332 // Assemble the UI
333 container.appendChild(status);
334 container.appendChild(button);
335 document.body.appendChild(container);
336
337 console.log('Code Extractor UI created');
338 }
339
340 // Initialize the extension
341 function init() {
342 // Wait for page to be ready
343 if (document.readyState === 'loading') {
344 document.addEventListener('DOMContentLoaded', createExtractionUI);
345 } else {
346 createExtractionUI();
347 }
348 }
349
350 // Start the extension
351 init();
352})();