Website Section Scraper to HTML

Scrape sections from any website and convert them to clean HTML format

Size

12.8 KB

Version

1.0.1

Created

Mar 26, 2026

Updated

21 days ago

1// ==UserScript==
2// @name		Website Section Scraper to HTML
3// @description		Scrape sections from any website and convert them to clean HTML format
4// @version		1.0.1
5// @match		https://*.robomonkey.io/*
6// @icon		https://robomonkey.io/favicon.ico
7// ==/UserScript==
8(function() {
9    'use strict';
10
11    // Debounce function to prevent excessive calls
12    function debounce(func, wait) {
13        let timeout;
14        return function executedFunction(...args) {
15            const later = () => {
16                clearTimeout(timeout);
17                func(...args);
18            };
19            clearTimeout(timeout);
20            timeout = setTimeout(later, wait);
21        };
22    }
23
24    // Create the scraper UI
25    function createScraperUI() {
26        // Check if UI already exists
27        if (document.getElementById('website-scraper-panel')) {
28            return;
29        }
30
31        const panel = document.createElement('div');
32        panel.id = 'website-scraper-panel';
33        panel.innerHTML = `
34            <div id="scraper-header">
35                <h3>Website Section Scraper</h3>
36                <button id="scraper-close-btn">×</button>
37            </div>
38            <div id="scraper-content">
39                <p>Click on any section of the page to scrape it as HTML</p>
40                <div id="scraper-status">Hover over elements to select...</div>
41                <div id="scraper-actions">
42                    <button id="scraper-copy-btn" disabled>Copy HTML</button>
43                    <button id="scraper-download-btn" disabled>Download HTML</button>
44                    <button id="scraper-clear-btn">Clear</button>
45                </div>
46                <div id="scraper-preview">
47                    <h4>HTML Preview:</h4>
48                    <pre id="scraper-html-output"></pre>
49                </div>
50            </div>
51        `;
52
53        document.body.appendChild(panel);
54
55        // Add styles
56        TM_addStyle(`
57            #website-scraper-panel {
58                position: fixed;
59                top: 20px;
60                right: 20px;
61                width: 400px;
62                max-height: 80vh;
63                background: #ffffff;
64                border: 2px solid #333;
65                border-radius: 8px;
66                box-shadow: 0 4px 20px rgba(0,0,0,0.3);
67                z-index: 999999;
68                font-family: Arial, sans-serif;
69                overflow: hidden;
70                display: flex;
71                flex-direction: column;
72            }
73
74            #scraper-header {
75                background: #333;
76                color: #fff;
77                padding: 12px 15px;
78                display: flex;
79                justify-content: space-between;
80                align-items: center;
81            }
82
83            #scraper-header h3 {
84                margin: 0;
85                font-size: 16px;
86                font-weight: bold;
87            }
88
89            #scraper-close-btn {
90                background: transparent;
91                border: none;
92                color: #fff;
93                font-size: 24px;
94                cursor: pointer;
95                padding: 0;
96                width: 30px;
97                height: 30px;
98                line-height: 1;
99            }
100
101            #scraper-close-btn:hover {
102                color: #ff4444;
103            }
104
105            #scraper-content {
106                padding: 15px;
107                overflow-y: auto;
108                flex: 1;
109            }
110
111            #scraper-content p {
112                margin: 0 0 10px 0;
113                color: #333;
114                font-size: 14px;
115            }
116
117            #scraper-status {
118                background: #f0f0f0;
119                padding: 10px;
120                border-radius: 4px;
121                margin-bottom: 15px;
122                font-size: 13px;
123                color: #555;
124                min-height: 20px;
125            }
126
127            #scraper-actions {
128                display: flex;
129                gap: 8px;
130                margin-bottom: 15px;
131            }
132
133            #scraper-actions button {
134                flex: 1;
135                padding: 8px 12px;
136                border: none;
137                border-radius: 4px;
138                cursor: pointer;
139                font-size: 13px;
140                font-weight: bold;
141                transition: all 0.2s;
142            }
143
144            #scraper-copy-btn {
145                background: #4CAF50;
146                color: white;
147            }
148
149            #scraper-copy-btn:hover:not(:disabled) {
150                background: #45a049;
151            }
152
153            #scraper-download-btn {
154                background: #2196F3;
155                color: white;
156            }
157
158            #scraper-download-btn:hover:not(:disabled) {
159                background: #0b7dda;
160            }
161
162            #scraper-clear-btn {
163                background: #f44336;
164                color: white;
165            }
166
167            #scraper-clear-btn:hover {
168                background: #da190b;
169            }
170
171            #scraper-actions button:disabled {
172                opacity: 0.5;
173                cursor: not-allowed;
174            }
175
176            #scraper-preview {
177                border-top: 1px solid #ddd;
178                padding-top: 15px;
179            }
180
181            #scraper-preview h4 {
182                margin: 0 0 10px 0;
183                font-size: 14px;
184                color: #333;
185            }
186
187            #scraper-html-output {
188                background: #f5f5f5;
189                border: 1px solid #ddd;
190                border-radius: 4px;
191                padding: 10px;
192                max-height: 300px;
193                overflow: auto;
194                font-size: 12px;
195                line-height: 1.4;
196                white-space: pre-wrap;
197                word-wrap: break-word;
198                color: #333;
199            }
200
201            .scraper-highlight {
202                outline: 3px solid #4CAF50 !important;
203                outline-offset: 2px;
204                cursor: pointer !important;
205            }
206
207            .scraper-selected {
208                outline: 3px solid #2196F3 !important;
209                outline-offset: 2px;
210            }
211        `);
212
213        setupEventListeners();
214        enableElementSelection();
215    }
216
217    let selectedElement = null;
218    let isSelectionMode = true;
219
220    function setupEventListeners() {
221        // Close button
222        document.getElementById('scraper-close-btn').addEventListener('click', () => {
223            document.getElementById('website-scraper-panel').remove();
224            disableElementSelection();
225        });
226
227        // Copy button
228        document.getElementById('scraper-copy-btn').addEventListener('click', async () => {
229            const htmlOutput = document.getElementById('scraper-html-output').textContent;
230            try {
231                await GM.setClipboard(htmlOutput);
232                updateStatus('HTML copied to clipboard!');
233            } catch (error) {
234                console.error('Failed to copy:', error);
235                updateStatus('Failed to copy HTML');
236            }
237        });
238
239        // Download button
240        document.getElementById('scraper-download-btn').addEventListener('click', () => {
241            const htmlOutput = document.getElementById('scraper-html-output').textContent;
242            const blob = new Blob([htmlOutput], { type: 'text/html' });
243            const url = URL.createObjectURL(blob);
244            const a = document.createElement('a');
245            a.href = url;
246            a.download = `scraped-section-${Date.now()}.html`;
247            a.click();
248            URL.revokeObjectURL(url);
249            updateStatus('HTML downloaded!');
250        });
251
252        // Clear button
253        document.getElementById('scraper-clear-btn').addEventListener('click', () => {
254            clearSelection();
255            updateStatus('Cleared. Hover over elements to select...');
256        });
257    }
258
259    function enableElementSelection() {
260        document.addEventListener('mouseover', handleMouseOver);
261        document.addEventListener('mouseout', handleMouseOut);
262        document.addEventListener('click', handleClick);
263    }
264
265    function disableElementSelection() {
266        document.removeEventListener('mouseover', handleMouseOver);
267        document.removeEventListener('mouseout', handleMouseOut);
268        document.removeEventListener('click', handleClick);
269        
270        // Remove all highlights
271        document.querySelectorAll('.scraper-highlight, .scraper-selected').forEach(el => {
272            el.classList.remove('scraper-highlight', 'scraper-selected');
273        });
274    }
275
276    function handleMouseOver(e) {
277        if (!isSelectionMode) return;
278        
279        const target = e.target;
280        
281        // Ignore scraper panel elements
282        if (target.closest('#website-scraper-panel')) {
283            return;
284        }
285
286        target.classList.add('scraper-highlight');
287    }
288
289    function handleMouseOut(e) {
290        if (!isSelectionMode) return;
291        
292        const target = e.target;
293        
294        if (target.closest('#website-scraper-panel')) {
295            return;
296        }
297
298        target.classList.remove('scraper-highlight');
299    }
300
301    function handleClick(e) {
302        if (!isSelectionMode) return;
303        
304        const target = e.target;
305        
306        // Ignore scraper panel elements
307        if (target.closest('#website-scraper-panel')) {
308            return;
309        }
310
311        e.preventDefault();
312        e.stopPropagation();
313
314        // Remove previous selection
315        if (selectedElement) {
316            selectedElement.classList.remove('scraper-selected');
317        }
318
319        // Set new selection
320        selectedElement = target;
321        selectedElement.classList.remove('scraper-highlight');
322        selectedElement.classList.add('scraper-selected');
323
324        // Extract and display HTML
325        extractHTML(selectedElement);
326    }
327
328    function extractHTML(element) {
329        try {
330            // Clone the element to avoid modifying the original
331            const clonedElement = element.cloneNode(true);
332
333            // Remove scraper classes
334            clonedElement.classList.remove('scraper-highlight', 'scraper-selected');
335            clonedElement.querySelectorAll('.scraper-highlight, .scraper-selected').forEach(el => {
336                el.classList.remove('scraper-highlight', 'scraper-selected');
337            });
338
339            // Get the HTML
340            const html = clonedElement.outerHTML;
341
342            // Format the HTML for better readability
343            const formattedHTML = formatHTML(html);
344
345            // Display in preview
346            document.getElementById('scraper-html-output').textContent = formattedHTML;
347
348            // Enable action buttons
349            document.getElementById('scraper-copy-btn').disabled = false;
350            document.getElementById('scraper-download-btn').disabled = false;
351
352            // Update status
353            const tagName = element.tagName.toLowerCase();
354            const className = element.className ? `.${element.className.split(' ').join('.')}` : '';
355            updateStatus(`Selected: <${tagName}${className}>`);
356
357            console.log('Scraped HTML from element:', element);
358        } catch (error) {
359            console.error('Error extracting HTML:', error);
360            updateStatus('Error extracting HTML');
361        }
362    }
363
364    function formatHTML(html) {
365        // Basic HTML formatting
366        let formatted = html;
367        let indent = 0;
368        const indentSize = 2;
369
370        formatted = formatted.replace(/></g, '>\n<');
371        
372        const lines = formatted.split('\n');
373        const formattedLines = lines.map(line => {
374            const trimmed = line.trim();
375            
376            if (trimmed.startsWith('</')) {
377                indent = Math.max(0, indent - indentSize);
378            }
379            
380            const indentedLine = ' '.repeat(indent) + trimmed;
381            
382            if (trimmed.startsWith('<') && !trimmed.startsWith('</') && !trimmed.endsWith('/>') && !trimmed.includes('</')) {
383                indent += indentSize;
384            }
385            
386            return indentedLine;
387        });
388
389        return formattedLines.join('\n');
390    }
391
392    function updateStatus(message) {
393        document.getElementById('scraper-status').innerHTML = message;
394    }
395
396    function clearSelection() {
397        if (selectedElement) {
398            selectedElement.classList.remove('scraper-selected');
399            selectedElement = null;
400        }
401
402        document.getElementById('scraper-html-output').textContent = '';
403        document.getElementById('scraper-copy-btn').disabled = true;
404        document.getElementById('scraper-download-btn').disabled = true;
405    }
406
407    // Initialize the extension
408    function init() {
409        console.log('Website Section Scraper initialized');
410        
411        // Wait for body to be ready
412        TM_runBody(() => {
413            createScraperUI();
414            console.log('Scraper UI created and ready');
415        });
416    }
417
418    // Start the extension
419    init();
420})();