Scrape sections from any website and convert them to clean HTML format
Size
12.8 KB
Version
1.0.1
Created
Mar 26, 2026
Updated
21 days ago
1// ==UserScript==
2// @name Website Section Scraper to HTML
3// @description Scrape sections from any website and convert them to clean HTML format
4// @version 1.0.1
5// @match https://*.robomonkey.io/*
6// @icon https://robomonkey.io/favicon.ico
7// ==/UserScript==
8(function() {
9 'use strict';
10
11 // Debounce function to prevent excessive calls
12 function debounce(func, wait) {
13 let timeout;
14 return function executedFunction(...args) {
15 const later = () => {
16 clearTimeout(timeout);
17 func(...args);
18 };
19 clearTimeout(timeout);
20 timeout = setTimeout(later, wait);
21 };
22 }
23
24 // Create the scraper UI
25 function createScraperUI() {
26 // Check if UI already exists
27 if (document.getElementById('website-scraper-panel')) {
28 return;
29 }
30
31 const panel = document.createElement('div');
32 panel.id = 'website-scraper-panel';
33 panel.innerHTML = `
34 <div id="scraper-header">
35 <h3>Website Section Scraper</h3>
36 <button id="scraper-close-btn">×</button>
37 </div>
38 <div id="scraper-content">
39 <p>Click on any section of the page to scrape it as HTML</p>
40 <div id="scraper-status">Hover over elements to select...</div>
41 <div id="scraper-actions">
42 <button id="scraper-copy-btn" disabled>Copy HTML</button>
43 <button id="scraper-download-btn" disabled>Download HTML</button>
44 <button id="scraper-clear-btn">Clear</button>
45 </div>
46 <div id="scraper-preview">
47 <h4>HTML Preview:</h4>
48 <pre id="scraper-html-output"></pre>
49 </div>
50 </div>
51 `;
52
53 document.body.appendChild(panel);
54
55 // Add styles
56 TM_addStyle(`
57 #website-scraper-panel {
58 position: fixed;
59 top: 20px;
60 right: 20px;
61 width: 400px;
62 max-height: 80vh;
63 background: #ffffff;
64 border: 2px solid #333;
65 border-radius: 8px;
66 box-shadow: 0 4px 20px rgba(0,0,0,0.3);
67 z-index: 999999;
68 font-family: Arial, sans-serif;
69 overflow: hidden;
70 display: flex;
71 flex-direction: column;
72 }
73
74 #scraper-header {
75 background: #333;
76 color: #fff;
77 padding: 12px 15px;
78 display: flex;
79 justify-content: space-between;
80 align-items: center;
81 }
82
83 #scraper-header h3 {
84 margin: 0;
85 font-size: 16px;
86 font-weight: bold;
87 }
88
89 #scraper-close-btn {
90 background: transparent;
91 border: none;
92 color: #fff;
93 font-size: 24px;
94 cursor: pointer;
95 padding: 0;
96 width: 30px;
97 height: 30px;
98 line-height: 1;
99 }
100
101 #scraper-close-btn:hover {
102 color: #ff4444;
103 }
104
105 #scraper-content {
106 padding: 15px;
107 overflow-y: auto;
108 flex: 1;
109 }
110
111 #scraper-content p {
112 margin: 0 0 10px 0;
113 color: #333;
114 font-size: 14px;
115 }
116
117 #scraper-status {
118 background: #f0f0f0;
119 padding: 10px;
120 border-radius: 4px;
121 margin-bottom: 15px;
122 font-size: 13px;
123 color: #555;
124 min-height: 20px;
125 }
126
127 #scraper-actions {
128 display: flex;
129 gap: 8px;
130 margin-bottom: 15px;
131 }
132
133 #scraper-actions button {
134 flex: 1;
135 padding: 8px 12px;
136 border: none;
137 border-radius: 4px;
138 cursor: pointer;
139 font-size: 13px;
140 font-weight: bold;
141 transition: all 0.2s;
142 }
143
144 #scraper-copy-btn {
145 background: #4CAF50;
146 color: white;
147 }
148
149 #scraper-copy-btn:hover:not(:disabled) {
150 background: #45a049;
151 }
152
153 #scraper-download-btn {
154 background: #2196F3;
155 color: white;
156 }
157
158 #scraper-download-btn:hover:not(:disabled) {
159 background: #0b7dda;
160 }
161
162 #scraper-clear-btn {
163 background: #f44336;
164 color: white;
165 }
166
167 #scraper-clear-btn:hover {
168 background: #da190b;
169 }
170
171 #scraper-actions button:disabled {
172 opacity: 0.5;
173 cursor: not-allowed;
174 }
175
176 #scraper-preview {
177 border-top: 1px solid #ddd;
178 padding-top: 15px;
179 }
180
181 #scraper-preview h4 {
182 margin: 0 0 10px 0;
183 font-size: 14px;
184 color: #333;
185 }
186
187 #scraper-html-output {
188 background: #f5f5f5;
189 border: 1px solid #ddd;
190 border-radius: 4px;
191 padding: 10px;
192 max-height: 300px;
193 overflow: auto;
194 font-size: 12px;
195 line-height: 1.4;
196 white-space: pre-wrap;
197 word-wrap: break-word;
198 color: #333;
199 }
200
201 .scraper-highlight {
202 outline: 3px solid #4CAF50 !important;
203 outline-offset: 2px;
204 cursor: pointer !important;
205 }
206
207 .scraper-selected {
208 outline: 3px solid #2196F3 !important;
209 outline-offset: 2px;
210 }
211 `);
212
213 setupEventListeners();
214 enableElementSelection();
215 }
216
217 let selectedElement = null;
218 let isSelectionMode = true;
219
220 function setupEventListeners() {
221 // Close button
222 document.getElementById('scraper-close-btn').addEventListener('click', () => {
223 document.getElementById('website-scraper-panel').remove();
224 disableElementSelection();
225 });
226
227 // Copy button
228 document.getElementById('scraper-copy-btn').addEventListener('click', async () => {
229 const htmlOutput = document.getElementById('scraper-html-output').textContent;
230 try {
231 await GM.setClipboard(htmlOutput);
232 updateStatus('HTML copied to clipboard!');
233 } catch (error) {
234 console.error('Failed to copy:', error);
235 updateStatus('Failed to copy HTML');
236 }
237 });
238
239 // Download button
240 document.getElementById('scraper-download-btn').addEventListener('click', () => {
241 const htmlOutput = document.getElementById('scraper-html-output').textContent;
242 const blob = new Blob([htmlOutput], { type: 'text/html' });
243 const url = URL.createObjectURL(blob);
244 const a = document.createElement('a');
245 a.href = url;
246 a.download = `scraped-section-${Date.now()}.html`;
247 a.click();
248 URL.revokeObjectURL(url);
249 updateStatus('HTML downloaded!');
250 });
251
252 // Clear button
253 document.getElementById('scraper-clear-btn').addEventListener('click', () => {
254 clearSelection();
255 updateStatus('Cleared. Hover over elements to select...');
256 });
257 }
258
259 function enableElementSelection() {
260 document.addEventListener('mouseover', handleMouseOver);
261 document.addEventListener('mouseout', handleMouseOut);
262 document.addEventListener('click', handleClick);
263 }
264
265 function disableElementSelection() {
266 document.removeEventListener('mouseover', handleMouseOver);
267 document.removeEventListener('mouseout', handleMouseOut);
268 document.removeEventListener('click', handleClick);
269
270 // Remove all highlights
271 document.querySelectorAll('.scraper-highlight, .scraper-selected').forEach(el => {
272 el.classList.remove('scraper-highlight', 'scraper-selected');
273 });
274 }
275
276 function handleMouseOver(e) {
277 if (!isSelectionMode) return;
278
279 const target = e.target;
280
281 // Ignore scraper panel elements
282 if (target.closest('#website-scraper-panel')) {
283 return;
284 }
285
286 target.classList.add('scraper-highlight');
287 }
288
289 function handleMouseOut(e) {
290 if (!isSelectionMode) return;
291
292 const target = e.target;
293
294 if (target.closest('#website-scraper-panel')) {
295 return;
296 }
297
298 target.classList.remove('scraper-highlight');
299 }
300
301 function handleClick(e) {
302 if (!isSelectionMode) return;
303
304 const target = e.target;
305
306 // Ignore scraper panel elements
307 if (target.closest('#website-scraper-panel')) {
308 return;
309 }
310
311 e.preventDefault();
312 e.stopPropagation();
313
314 // Remove previous selection
315 if (selectedElement) {
316 selectedElement.classList.remove('scraper-selected');
317 }
318
319 // Set new selection
320 selectedElement = target;
321 selectedElement.classList.remove('scraper-highlight');
322 selectedElement.classList.add('scraper-selected');
323
324 // Extract and display HTML
325 extractHTML(selectedElement);
326 }
327
328 function extractHTML(element) {
329 try {
330 // Clone the element to avoid modifying the original
331 const clonedElement = element.cloneNode(true);
332
333 // Remove scraper classes
334 clonedElement.classList.remove('scraper-highlight', 'scraper-selected');
335 clonedElement.querySelectorAll('.scraper-highlight, .scraper-selected').forEach(el => {
336 el.classList.remove('scraper-highlight', 'scraper-selected');
337 });
338
339 // Get the HTML
340 const html = clonedElement.outerHTML;
341
342 // Format the HTML for better readability
343 const formattedHTML = formatHTML(html);
344
345 // Display in preview
346 document.getElementById('scraper-html-output').textContent = formattedHTML;
347
348 // Enable action buttons
349 document.getElementById('scraper-copy-btn').disabled = false;
350 document.getElementById('scraper-download-btn').disabled = false;
351
352 // Update status
353 const tagName = element.tagName.toLowerCase();
354 const className = element.className ? `.${element.className.split(' ').join('.')}` : '';
355 updateStatus(`Selected: <${tagName}${className}>`);
356
357 console.log('Scraped HTML from element:', element);
358 } catch (error) {
359 console.error('Error extracting HTML:', error);
360 updateStatus('Error extracting HTML');
361 }
362 }
363
364 function formatHTML(html) {
365 // Basic HTML formatting
366 let formatted = html;
367 let indent = 0;
368 const indentSize = 2;
369
370 formatted = formatted.replace(/></g, '>\n<');
371
372 const lines = formatted.split('\n');
373 const formattedLines = lines.map(line => {
374 const trimmed = line.trim();
375
376 if (trimmed.startsWith('</')) {
377 indent = Math.max(0, indent - indentSize);
378 }
379
380 const indentedLine = ' '.repeat(indent) + trimmed;
381
382 if (trimmed.startsWith('<') && !trimmed.startsWith('</') && !trimmed.endsWith('/>') && !trimmed.includes('</')) {
383 indent += indentSize;
384 }
385
386 return indentedLine;
387 });
388
389 return formattedLines.join('\n');
390 }
391
392 function updateStatus(message) {
393 document.getElementById('scraper-status').innerHTML = message;
394 }
395
396 function clearSelection() {
397 if (selectedElement) {
398 selectedElement.classList.remove('scraper-selected');
399 selectedElement = null;
400 }
401
402 document.getElementById('scraper-html-output').textContent = '';
403 document.getElementById('scraper-copy-btn').disabled = true;
404 document.getElementById('scraper-download-btn').disabled = true;
405 }
406
407 // Initialize the extension
408 function init() {
409 console.log('Website Section Scraper initialized');
410
411 // Wait for body to be ready
412 TM_runBody(() => {
413 createScraperUI();
414 console.log('Scraper UI created and ready');
415 });
416 }
417
418 // Start the extension
419 init();
420})();