Contact Scraper - Auto Extract Emails & Phone Numbers

Automatically scrapes email addresses and phone numbers from webpages with smart detection of associated names. Export to Excel/CSV.

Size

24.2 KB

Version

1.0.1

Created

Mar 22, 2026

Updated

24 days ago

1// ==UserScript==
2// @name		Contact Scraper - Auto Extract Emails & Phone Numbers
3// @description		Automatically scrapes email addresses and phone numbers from webpages with smart detection of associated names. Export to Excel/CSV.
4// @version		1.0.1
5// @match		*://*/*
6// @icon		https://robomonkey.io/favicon.ico
7// @grant		GM.getValue
8// @grant		GM.setValue
9// @grant		GM.listValues
10// @require		https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js
11// ==/UserScript==
12(function() {
13    'use strict';
14
15    // Utility function to debounce
16    function debounce(func, wait) {
17        let timeout;
18        return function executedFunction(...args) {
19            const later = () => {
20                clearTimeout(timeout);
21                func(...args);
22            };
23            clearTimeout(timeout);
24            timeout = setTimeout(later, wait);
25        };
26    }
27
28    // Extract domain from URL
29    function getDomain(url) {
30        try {
31            const urlObj = new URL(url);
32            return urlObj.hostname.replace('www.', '');
33        } catch (e) {
34            return '';
35        }
36    }
37
38    // Get current page domain
39    function getCurrentDomain() {
40        return getDomain(window.location.href);
41    }
42
43    // Email regex pattern
44    function extractEmails(text) {
45        const emailRegex = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
46        return text.match(emailRegex) || [];
47    }
48
49    // Phone number regex patterns (international formats)
50    function extractPhones(text) {
51        const phonePatterns = [
52            /\+?\d{1,4}?[-.\s]?\(?\d{1,3}?\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}/g,
53            /\(\d{3}\)\s*\d{3}[-.\s]?\d{4}/g,
54            /\d{3}[-.\s]\d{3}[-.\s]\d{4}/g,
55            /\d{10,}/g
56        ];
57        
58        let phones = [];
59        phonePatterns.forEach(pattern => {
60            const matches = text.match(pattern) || [];
61            phones = phones.concat(matches);
62        });
63        
64        // Filter and clean phone numbers
65        return phones
66            .map(p => p.trim())
67            .filter(p => {
68                // Remove non-digit characters for validation
69                const digits = p.replace(/\D/g, '');
70                // Valid phone should have 7-15 digits
71                return digits.length >= 7 && digits.length <= 15;
72            });
73    }
74
75    // Try to find associated name near contact info
76    function findAssociatedName(element) {
77        if (!element) return '';
78
79        // Search in parent and nearby elements
80        let searchElement = element;
81        let attempts = 0;
82        const maxAttempts = 5;
83
84        while (searchElement && attempts < maxAttempts) {
85            // Look for name patterns in the same element or parent
86            const text = searchElement.textContent || '';
87            
88            // Common name patterns
89            const namePatterns = [
90                /(?:Name|Contact|Person|Author|By):\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)/i,
91                /([A-Z][a-z]+\s+[A-Z][a-z]+)(?=\s*[@\d\+\(])/,
92                /^([A-Z][a-z]+\s+[A-Z][a-z]+)/m
93            ];
94
95            for (const pattern of namePatterns) {
96                const match = text.match(pattern);
97                if (match && match[1]) {
98                    const name = match[1].trim();
99                    // Validate name (2-50 chars, letters and spaces)
100                    if (name.length >= 2 && name.length <= 50 && /^[A-Za-z\s]+$/.test(name)) {
101                        return name;
102                    }
103                }
104            }
105
106            // Check for specific HTML structures
107            const nameElement = searchElement.querySelector('[class*="name"], [class*="author"], [class*="contact"]');
108            if (nameElement) {
109                const name = nameElement.textContent.trim();
110                if (name.length >= 2 && name.length <= 50 && /^[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*$/.test(name)) {
111                    return name;
112                }
113            }
114
115            searchElement = searchElement.parentElement;
116            attempts++;
117        }
118
119        return '';
120    }
121
122    // Scrape contacts from the page
123    async function scrapeContacts() {
124        console.log('Starting contact scraping...');
125        
126        const currentDomain = getCurrentDomain();
127        const pageUrl = window.location.href;
128        const pageTitle = document.title;
129        const timestamp = new Date().toISOString();
130
131        // Get all text content
132        const bodyText = document.body.innerText || '';
133        
134        // Extract emails and phones
135        const emails = extractEmails(bodyText);
136        const phones = extractPhones(bodyText);
137
138        console.log(`Found ${emails.length} emails and ${phones.length} phones`);
139
140        const contacts = [];
141        const processedContacts = new Set();
142
143        // Process emails
144        for (const email of emails) {
145            const emailDomain = email.split('@')[1];
146            
147            // Skip if email belongs to current domain
148            if (emailDomain === currentDomain) {
149                console.log(`Skipping email from current domain: ${email}`);
150                continue;
151            }
152
153            // Find the element containing this email
154            const emailElements = Array.from(document.querySelectorAll('*')).filter(el => {
155                return el.textContent.includes(email) && el.children.length < 5;
156            });
157
158            let name = '';
159            if (emailElements.length > 0) {
160                name = findAssociatedName(emailElements[0]);
161            }
162
163            const contactKey = `${email}|${name}`;
164            if (!processedContacts.has(contactKey)) {
165                processedContacts.add(contactKey);
166                contacts.push({
167                    name: name || 'Unknown',
168                    email: email,
169                    phone: '',
170                    sourceDomain: currentDomain,
171                    pageUrl: pageUrl,
172                    pageTitle: pageTitle,
173                    dateCollected: timestamp
174                });
175            }
176        }
177
178        // Process phones
179        for (const phone of phones) {
180            // Find the element containing this phone
181            const phoneElements = Array.from(document.querySelectorAll('*')).filter(el => {
182                return el.textContent.includes(phone) && el.children.length < 5;
183            });
184
185            let name = '';
186            if (phoneElements.length > 0) {
187                name = findAssociatedName(phoneElements[0]);
188            }
189
190            const contactKey = `${phone}|${name}`;
191            if (!processedContacts.has(contactKey)) {
192                processedContacts.add(contactKey);
193                contacts.push({
194                    name: name || 'Unknown',
195                    email: '',
196                    phone: phone,
197                    sourceDomain: currentDomain,
198                    pageUrl: pageUrl,
199                    pageTitle: pageTitle,
200                    dateCollected: timestamp
201                });
202            }
203        }
204
205        // Save contacts to storage
206        if (contacts.length > 0) {
207            await saveContacts(contacts);
208            console.log(`Saved ${contacts.length} new contacts`);
209        } else {
210            console.log('No new contacts found on this page');
211        }
212
213        return contacts;
214    }
215
216    // Save contacts to storage (avoiding duplicates)
217    async function saveContacts(newContacts) {
218        const existingContacts = await getAllContacts();
219        
220        // Create a set of existing contact keys for duplicate detection
221        const existingKeys = new Set(
222            existingContacts.map(c => `${c.email}|${c.phone}|${c.name}`.toLowerCase())
223        );
224
225        // Filter out duplicates
226        const uniqueContacts = newContacts.filter(contact => {
227            const key = `${contact.email}|${contact.phone}|${contact.name}`.toLowerCase();
228            return !existingKeys.has(key);
229        });
230
231        if (uniqueContacts.length === 0) {
232            console.log('No new unique contacts to save');
233            return;
234        }
235
236        // Add unique contacts to storage
237        for (const contact of uniqueContacts) {
238            const contactId = `contact_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
239            await GM.setValue(contactId, JSON.stringify(contact));
240        }
241
242        console.log(`Saved ${uniqueContacts.length} unique contacts`);
243    }
244
245    // Get all contacts from storage
246    async function getAllContacts() {
247        const keys = await GM.listValues();
248        const contactKeys = keys.filter(k => k.startsWith('contact_'));
249        
250        const contacts = [];
251        for (const key of contactKeys) {
252            const contactJson = await GM.getValue(key);
253            if (contactJson) {
254                try {
255                    contacts.push(JSON.parse(contactJson));
256                } catch (e) {
257                    console.error('Error parsing contact:', e);
258                }
259            }
260        }
261
262        return contacts;
263    }
264
265    // Clear all contacts
266    async function clearAllContacts() {
267        const keys = await GM.listValues();
268        const contactKeys = keys.filter(k => k.startsWith('contact_'));
269        
270        for (const key of contactKeys) {
271            await GM.deleteValue(key);
272        }
273        
274        console.log(`Cleared ${contactKeys.length} contacts`);
275    }
276
277    // Export to CSV
278    function exportToCSV(contacts) {
279        const headers = ['Name', 'Email', 'Phone', 'Source Domain', 'Page URL', 'Page Title', 'Date Collected'];
280        const rows = contacts.map(c => [
281            c.name,
282            c.email,
283            c.phone,
284            c.sourceDomain,
285            c.pageUrl,
286            c.pageTitle,
287            c.dateCollected
288        ]);
289
290        const csvContent = [
291            headers.join(','),
292            ...rows.map(row => row.map(cell => `"${cell}"`).join(','))
293        ].join('\n');
294
295        const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
296        const link = document.createElement('a');
297        const url = URL.createObjectURL(blob);
298        
299        link.setAttribute('href', url);
300        link.setAttribute('download', `contacts_${Date.now()}.csv`);
301        link.style.visibility = 'hidden';
302        document.body.appendChild(link);
303        link.click();
304        document.body.removeChild(link);
305    }
306
307    // Export to Excel
308    function exportToExcel(contacts) {
309        const data = contacts.map(c => ({
310            'Name': c.name,
311            'Email': c.email,
312            'Phone': c.phone,
313            'Source Domain': c.sourceDomain,
314            'Page URL': c.pageUrl,
315            'Page Title': c.pageTitle,
316            'Date Collected': c.dateCollected
317        }));
318
319        const worksheet = XLSX.utils.json_to_sheet(data);
320        const workbook = XLSX.utils.book_new();
321        XLSX.utils.book_append_sheet(workbook, worksheet, 'Contacts');
322
323        // Auto-size columns
324        const maxWidth = 50;
325        const colWidths = [
326            { wch: 20 }, // Name
327            { wch: 30 }, // Email
328            { wch: 20 }, // Phone
329            { wch: 25 }, // Source Domain
330            { wch: maxWidth }, // Page URL
331            { wch: 30 }, // Page Title
332            { wch: 20 }  // Date Collected
333        ];
334        worksheet['!cols'] = colWidths;
335
336        XLSX.writeFile(workbook, `contacts_${Date.now()}.xlsx`);
337    }
338
339    // Create popup UI
340    function createPopup() {
341        // Remove existing popup if any
342        const existingPopup = document.getElementById('contact-scraper-popup');
343        if (existingPopup) {
344            existingPopup.remove();
345            return;
346        }
347
348        const popup = document.createElement('div');
349        popup.id = 'contact-scraper-popup';
350        popup.innerHTML = `
351            <div class="cs-header">
352                <h3>📇 Contact Scraper</h3>
353                <button class="cs-close" title="Close"></button>
354            </div>
355            <div class="cs-stats">
356                <span id="cs-count">Loading...</span>
357            </div>
358            <div class="cs-actions">
359                <button class="cs-btn cs-btn-primary" id="cs-export-excel">📊 Export Excel</button>
360                <button class="cs-btn cs-btn-primary" id="cs-export-csv">📄 Export CSV</button>
361                <button class="cs-btn cs-btn-secondary" id="cs-refresh">🔄 Refresh</button>
362                <button class="cs-btn cs-btn-danger" id="cs-clear">🗑️ Clear All</button>
363            </div>
364            <div class="cs-contacts" id="cs-contacts-list">
365                <div class="cs-loading">Loading contacts...</div>
366            </div>
367        `;
368
369        document.body.appendChild(popup);
370
371        // Add styles
372        const style = document.createElement('style');
373        style.textContent = `
374            #contact-scraper-popup {
375                position: fixed;
376                top: 50%;
377                left: 50%;
378                transform: translate(-50%, -50%);
379                width: 700px;
380                max-width: 90vw;
381                max-height: 80vh;
382                background: white;
383                border-radius: 12px;
384                box-shadow: 0 10px 40px rgba(0,0,0,0.3);
385                z-index: 999999;
386                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
387                display: flex;
388                flex-direction: column;
389            }
390
391            .cs-header {
392                display: flex;
393                justify-content: space-between;
394                align-items: center;
395                padding: 20px;
396                border-bottom: 2px solid #e0e0e0;
397                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
398                color: white;
399                border-radius: 12px 12px 0 0;
400            }
401
402            .cs-header h3 {
403                margin: 0;
404                font-size: 20px;
405                font-weight: 600;
406            }
407
408            .cs-close {
409                background: rgba(255,255,255,0.2);
410                border: none;
411                color: white;
412                font-size: 24px;
413                cursor: pointer;
414                width: 32px;
415                height: 32px;
416                border-radius: 50%;
417                display: flex;
418                align-items: center;
419                justify-content: center;
420                transition: background 0.2s;
421            }
422
423            .cs-close:hover {
424                background: rgba(255,255,255,0.3);
425            }
426
427            .cs-stats {
428                padding: 15px 20px;
429                background: #f8f9fa;
430                border-bottom: 1px solid #e0e0e0;
431                font-size: 14px;
432                color: #333;
433                font-weight: 500;
434            }
435
436            .cs-actions {
437                display: flex;
438                gap: 10px;
439                padding: 15px 20px;
440                border-bottom: 1px solid #e0e0e0;
441                flex-wrap: wrap;
442            }
443
444            .cs-btn {
445                padding: 8px 16px;
446                border: none;
447                border-radius: 6px;
448                cursor: pointer;
449                font-size: 13px;
450                font-weight: 500;
451                transition: all 0.2s;
452                flex: 1;
453                min-width: 120px;
454            }
455
456            .cs-btn-primary {
457                background: #667eea;
458                color: white;
459            }
460
461            .cs-btn-primary:hover {
462                background: #5568d3;
463                transform: translateY(-1px);
464            }
465
466            .cs-btn-secondary {
467                background: #6c757d;
468                color: white;
469            }
470
471            .cs-btn-secondary:hover {
472                background: #5a6268;
473                transform: translateY(-1px);
474            }
475
476            .cs-btn-danger {
477                background: #dc3545;
478                color: white;
479            }
480
481            .cs-btn-danger:hover {
482                background: #c82333;
483                transform: translateY(-1px);
484            }
485
486            .cs-contacts {
487                overflow-y: auto;
488                padding: 20px;
489                flex: 1;
490                min-height: 200px;
491                max-height: 400px;
492            }
493
494            .cs-loading {
495                text-align: center;
496                padding: 40px;
497                color: #666;
498            }
499
500            .cs-contact-item {
501                background: #f8f9fa;
502                border: 1px solid #e0e0e0;
503                border-radius: 8px;
504                padding: 15px;
505                margin-bottom: 12px;
506                transition: all 0.2s;
507            }
508
509            .cs-contact-item:hover {
510                box-shadow: 0 2px 8px rgba(0,0,0,0.1);
511                transform: translateY(-1px);
512            }
513
514            .cs-contact-name {
515                font-weight: 600;
516                font-size: 16px;
517                color: #333;
518                margin-bottom: 8px;
519            }
520
521            .cs-contact-detail {
522                font-size: 13px;
523                color: #666;
524                margin: 4px 0;
525                display: flex;
526                align-items: center;
527                gap: 8px;
528            }
529
530            .cs-contact-detail strong {
531                color: #333;
532                min-width: 100px;
533            }
534
535            .cs-contact-link {
536                color: #667eea;
537                text-decoration: none;
538                word-break: break-all;
539            }
540
541            .cs-contact-link:hover {
542                text-decoration: underline;
543            }
544
545            .cs-empty {
546                text-align: center;
547                padding: 40px;
548                color: #999;
549            }
550
551            .cs-empty-icon {
552                font-size: 48px;
553                margin-bottom: 10px;
554            }
555        `;
556        document.head.appendChild(style);
557
558        // Load and display contacts
559        loadContactsToPopup();
560
561        // Event listeners
562        popup.querySelector('.cs-close').addEventListener('click', () => {
563            popup.remove();
564        });
565
566        popup.querySelector('#cs-export-excel').addEventListener('click', async () => {
567            const contacts = await getAllContacts();
568            if (contacts.length > 0) {
569                exportToExcel(contacts);
570            } else {
571                alert('No contacts to export');
572            }
573        });
574
575        popup.querySelector('#cs-export-csv').addEventListener('click', async () => {
576            const contacts = await getAllContacts();
577            if (contacts.length > 0) {
578                exportToCSV(contacts);
579            } else {
580                alert('No contacts to export');
581            }
582        });
583
584        popup.querySelector('#cs-refresh').addEventListener('click', () => {
585            loadContactsToPopup();
586        });
587
588        popup.querySelector('#cs-clear').addEventListener('click', async () => {
589            if (confirm('Are you sure you want to clear all contacts? This cannot be undone.')) {
590                await clearAllContacts();
591                loadContactsToPopup();
592            }
593        });
594
595        // Close on outside click
596        popup.addEventListener('click', (e) => {
597            if (e.target === popup) {
598                popup.remove();
599            }
600        });
601    }
602
603    // Load contacts into popup
604    async function loadContactsToPopup() {
605        const contactsList = document.getElementById('cs-contacts-list');
606        const countElement = document.getElementById('cs-count');
607
608        if (!contactsList || !countElement) return;
609
610        contactsList.innerHTML = '<div class="cs-loading">Loading contacts...</div>';
611
612        const contacts = await getAllContacts();
613        
614        countElement.textContent = `Total Contacts: ${contacts.length}`;
615
616        if (contacts.length === 0) {
617            contactsList.innerHTML = `
618                <div class="cs-empty">
619                    <div class="cs-empty-icon">📭</div>
620                    <div>No contacts collected yet</div>
621                    <div style="margin-top: 10px; font-size: 12px;">Browse websites to automatically collect contacts</div>
622                </div>
623            `;
624            return;
625        }
626
627        // Sort by date (newest first)
628        contacts.sort((a, b) => new Date(b.dateCollected) - new Date(a.dateCollected));
629
630        contactsList.innerHTML = contacts.map(contact => `
631            <div class="cs-contact-item">
632                <div class="cs-contact-name">${contact.name}</div>
633                ${contact.email ? `<div class="cs-contact-detail"><strong>📧 Email:</strong> <span>${contact.email}</span></div>` : ''}
634                ${contact.phone ? `<div class="cs-contact-detail"><strong>📱 Phone:</strong> <span>${contact.phone}</span></div>` : ''}
635                <div class="cs-contact-detail"><strong>🌐 Domain:</strong> <span>${contact.sourceDomain}</span></div>
636                <div class="cs-contact-detail"><strong>📄 Page:</strong> <a href="${contact.pageUrl}" class="cs-contact-link" target="_blank" title="${contact.pageTitle}">${contact.pageTitle || contact.pageUrl}</a></div>
637                <div class="cs-contact-detail"><strong>📅 Collected:</strong> <span>${new Date(contact.dateCollected).toLocaleString()}</span></div>
638            </div>
639        `).join('');
640    }
641
642    // Add floating button to toggle popup
643    function createFloatingButton() {
644        const button = document.createElement('button');
645        button.id = 'contact-scraper-btn';
646        button.innerHTML = '📇';
647        button.title = 'Contact Scraper';
648        
649        const style = document.createElement('style');
650        style.textContent = `
651            #contact-scraper-btn {
652                position: fixed;
653                bottom: 20px;
654                right: 20px;
655                width: 56px;
656                height: 56px;
657                border-radius: 50%;
658                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
659                color: white;
660                border: none;
661                font-size: 24px;
662                cursor: pointer;
663                box-shadow: 0 4px 12px rgba(0,0,0,0.3);
664                z-index: 999998;
665                transition: all 0.3s;
666                display: flex;
667                align-items: center;
668                justify-content: center;
669            }
670
671            #contact-scraper-btn:hover {
672                transform: scale(1.1);
673                box-shadow: 0 6px 16px rgba(0,0,0,0.4);
674            }
675
676            #contact-scraper-btn:active {
677                transform: scale(0.95);
678            }
679        `;
680        
681        document.head.appendChild(style);
682        document.body.appendChild(button);
683
684        button.addEventListener('click', createPopup);
685    }
686
687    // Initialize
688    async function init() {
689        console.log('Contact Scraper Extension Initialized');
690
691        // Wait for page to be fully loaded
692        if (document.readyState === 'loading') {
693            document.addEventListener('DOMContentLoaded', () => {
694                setTimeout(init, 1000);
695            });
696            return;
697        }
698
699        // Create floating button
700        createFloatingButton();
701
702        // Scrape current page after a delay
703        setTimeout(() => {
704            scrapeContacts();
705        }, 2000);
706
707        // Listen for page changes (for SPAs)
708        let lastUrl = window.location.href;
709        const checkUrlChange = debounce(() => {
710            if (window.location.href !== lastUrl) {
711                lastUrl = window.location.href;
712                console.log('URL changed, scraping new page...');
713                setTimeout(() => {
714                    scrapeContacts();
715                }, 2000);
716            }
717        }, 500);
718
719        // Monitor URL changes
720        setInterval(checkUrlChange, 1000);
721
722        // Also listen to popstate and pushstate
723        window.addEventListener('popstate', () => {
724            setTimeout(() => {
725                scrapeContacts();
726            }, 2000);
727        });
728
729        // Intercept pushState and replaceState
730        const originalPushState = history.pushState;
731        const originalReplaceState = history.replaceState;
732
733        history.pushState = function() {
734            originalPushState.apply(this, arguments);
735            setTimeout(() => {
736                scrapeContacts();
737            }, 2000);
738        };
739
740        history.replaceState = function() {
741            originalReplaceState.apply(this, arguments);
742            setTimeout(() => {
743                scrapeContacts();
744            }, 2000);
745        };
746
747        console.log('Contact scraper is now monitoring page changes');
748    }
749
750    // Start the extension
751    init();
752})();