Automatically scrapes email addresses and phone numbers from webpages with smart detection of associated names. Export to Excel/CSV.
Size
24.2 KB
Version
1.0.1
Created
Mar 22, 2026
Updated
25 days ago
1// ==UserScript==
2// @name Contact Scraper - Auto Extract Emails & Phone Numbers
3// @description Automatically scrapes email addresses and phone numbers from webpages with smart detection of associated names. Export to Excel/CSV.
4// @version 1.0.1
5// @match *://*/*
6// @icon https://robomonkey.io/favicon.ico
7// @grant GM.getValue
8// @grant GM.setValue
9// @grant GM.listValues
10// @require https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js
11// ==/UserScript==
12(function() {
13 'use strict';
14
15 // Utility function to debounce
16 function debounce(func, wait) {
17 let timeout;
18 return function executedFunction(...args) {
19 const later = () => {
20 clearTimeout(timeout);
21 func(...args);
22 };
23 clearTimeout(timeout);
24 timeout = setTimeout(later, wait);
25 };
26 }
27
28 // Extract domain from URL
29 function getDomain(url) {
30 try {
31 const urlObj = new URL(url);
32 return urlObj.hostname.replace('www.', '');
33 } catch (e) {
34 return '';
35 }
36 }
37
38 // Get current page domain
39 function getCurrentDomain() {
40 return getDomain(window.location.href);
41 }
42
43 // Email regex pattern
44 function extractEmails(text) {
45 const emailRegex = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
46 return text.match(emailRegex) || [];
47 }
48
49 // Phone number regex patterns (international formats)
50 function extractPhones(text) {
51 const phonePatterns = [
52 /\+?\d{1,4}?[-.\s]?\(?\d{1,3}?\)?[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}/g,
53 /\(\d{3}\)\s*\d{3}[-.\s]?\d{4}/g,
54 /\d{3}[-.\s]\d{3}[-.\s]\d{4}/g,
55 /\d{10,}/g
56 ];
57
58 let phones = [];
59 phonePatterns.forEach(pattern => {
60 const matches = text.match(pattern) || [];
61 phones = phones.concat(matches);
62 });
63
64 // Filter and clean phone numbers
65 return phones
66 .map(p => p.trim())
67 .filter(p => {
68 // Remove non-digit characters for validation
69 const digits = p.replace(/\D/g, '');
70 // Valid phone should have 7-15 digits
71 return digits.length >= 7 && digits.length <= 15;
72 });
73 }
74
75 // Try to find associated name near contact info
76 function findAssociatedName(element) {
77 if (!element) return '';
78
79 // Search in parent and nearby elements
80 let searchElement = element;
81 let attempts = 0;
82 const maxAttempts = 5;
83
84 while (searchElement && attempts < maxAttempts) {
85 // Look for name patterns in the same element or parent
86 const text = searchElement.textContent || '';
87
88 // Common name patterns
89 const namePatterns = [
90 /(?:Name|Contact|Person|Author|By):\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)/i,
91 /([A-Z][a-z]+\s+[A-Z][a-z]+)(?=\s*[@\d\+\(])/,
92 /^([A-Z][a-z]+\s+[A-Z][a-z]+)/m
93 ];
94
95 for (const pattern of namePatterns) {
96 const match = text.match(pattern);
97 if (match && match[1]) {
98 const name = match[1].trim();
99 // Validate name (2-50 chars, letters and spaces)
100 if (name.length >= 2 && name.length <= 50 && /^[A-Za-z\s]+$/.test(name)) {
101 return name;
102 }
103 }
104 }
105
106 // Check for specific HTML structures
107 const nameElement = searchElement.querySelector('[class*="name"], [class*="author"], [class*="contact"]');
108 if (nameElement) {
109 const name = nameElement.textContent.trim();
110 if (name.length >= 2 && name.length <= 50 && /^[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*$/.test(name)) {
111 return name;
112 }
113 }
114
115 searchElement = searchElement.parentElement;
116 attempts++;
117 }
118
119 return '';
120 }
121
122 // Scrape contacts from the page
123 async function scrapeContacts() {
124 console.log('Starting contact scraping...');
125
126 const currentDomain = getCurrentDomain();
127 const pageUrl = window.location.href;
128 const pageTitle = document.title;
129 const timestamp = new Date().toISOString();
130
131 // Get all text content
132 const bodyText = document.body.innerText || '';
133
134 // Extract emails and phones
135 const emails = extractEmails(bodyText);
136 const phones = extractPhones(bodyText);
137
138 console.log(`Found ${emails.length} emails and ${phones.length} phones`);
139
140 const contacts = [];
141 const processedContacts = new Set();
142
143 // Process emails
144 for (const email of emails) {
145 const emailDomain = email.split('@')[1];
146
147 // Skip if email belongs to current domain
148 if (emailDomain === currentDomain) {
149 console.log(`Skipping email from current domain: ${email}`);
150 continue;
151 }
152
153 // Find the element containing this email
154 const emailElements = Array.from(document.querySelectorAll('*')).filter(el => {
155 return el.textContent.includes(email) && el.children.length < 5;
156 });
157
158 let name = '';
159 if (emailElements.length > 0) {
160 name = findAssociatedName(emailElements[0]);
161 }
162
163 const contactKey = `${email}|${name}`;
164 if (!processedContacts.has(contactKey)) {
165 processedContacts.add(contactKey);
166 contacts.push({
167 name: name || 'Unknown',
168 email: email,
169 phone: '',
170 sourceDomain: currentDomain,
171 pageUrl: pageUrl,
172 pageTitle: pageTitle,
173 dateCollected: timestamp
174 });
175 }
176 }
177
178 // Process phones
179 for (const phone of phones) {
180 // Find the element containing this phone
181 const phoneElements = Array.from(document.querySelectorAll('*')).filter(el => {
182 return el.textContent.includes(phone) && el.children.length < 5;
183 });
184
185 let name = '';
186 if (phoneElements.length > 0) {
187 name = findAssociatedName(phoneElements[0]);
188 }
189
190 const contactKey = `${phone}|${name}`;
191 if (!processedContacts.has(contactKey)) {
192 processedContacts.add(contactKey);
193 contacts.push({
194 name: name || 'Unknown',
195 email: '',
196 phone: phone,
197 sourceDomain: currentDomain,
198 pageUrl: pageUrl,
199 pageTitle: pageTitle,
200 dateCollected: timestamp
201 });
202 }
203 }
204
205 // Save contacts to storage
206 if (contacts.length > 0) {
207 await saveContacts(contacts);
208 console.log(`Saved ${contacts.length} new contacts`);
209 } else {
210 console.log('No new contacts found on this page');
211 }
212
213 return contacts;
214 }
215
216 // Save contacts to storage (avoiding duplicates)
217 async function saveContacts(newContacts) {
218 const existingContacts = await getAllContacts();
219
220 // Create a set of existing contact keys for duplicate detection
221 const existingKeys = new Set(
222 existingContacts.map(c => `${c.email}|${c.phone}|${c.name}`.toLowerCase())
223 );
224
225 // Filter out duplicates
226 const uniqueContacts = newContacts.filter(contact => {
227 const key = `${contact.email}|${contact.phone}|${contact.name}`.toLowerCase();
228 return !existingKeys.has(key);
229 });
230
231 if (uniqueContacts.length === 0) {
232 console.log('No new unique contacts to save');
233 return;
234 }
235
236 // Add unique contacts to storage
237 for (const contact of uniqueContacts) {
238 const contactId = `contact_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
239 await GM.setValue(contactId, JSON.stringify(contact));
240 }
241
242 console.log(`Saved ${uniqueContacts.length} unique contacts`);
243 }
244
245 // Get all contacts from storage
246 async function getAllContacts() {
247 const keys = await GM.listValues();
248 const contactKeys = keys.filter(k => k.startsWith('contact_'));
249
250 const contacts = [];
251 for (const key of contactKeys) {
252 const contactJson = await GM.getValue(key);
253 if (contactJson) {
254 try {
255 contacts.push(JSON.parse(contactJson));
256 } catch (e) {
257 console.error('Error parsing contact:', e);
258 }
259 }
260 }
261
262 return contacts;
263 }
264
265 // Clear all contacts
266 async function clearAllContacts() {
267 const keys = await GM.listValues();
268 const contactKeys = keys.filter(k => k.startsWith('contact_'));
269
270 for (const key of contactKeys) {
271 await GM.deleteValue(key);
272 }
273
274 console.log(`Cleared ${contactKeys.length} contacts`);
275 }
276
277 // Export to CSV
278 function exportToCSV(contacts) {
279 const headers = ['Name', 'Email', 'Phone', 'Source Domain', 'Page URL', 'Page Title', 'Date Collected'];
280 const rows = contacts.map(c => [
281 c.name,
282 c.email,
283 c.phone,
284 c.sourceDomain,
285 c.pageUrl,
286 c.pageTitle,
287 c.dateCollected
288 ]);
289
290 const csvContent = [
291 headers.join(','),
292 ...rows.map(row => row.map(cell => `"${cell}"`).join(','))
293 ].join('\n');
294
295 const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
296 const link = document.createElement('a');
297 const url = URL.createObjectURL(blob);
298
299 link.setAttribute('href', url);
300 link.setAttribute('download', `contacts_${Date.now()}.csv`);
301 link.style.visibility = 'hidden';
302 document.body.appendChild(link);
303 link.click();
304 document.body.removeChild(link);
305 }
306
307 // Export to Excel
308 function exportToExcel(contacts) {
309 const data = contacts.map(c => ({
310 'Name': c.name,
311 'Email': c.email,
312 'Phone': c.phone,
313 'Source Domain': c.sourceDomain,
314 'Page URL': c.pageUrl,
315 'Page Title': c.pageTitle,
316 'Date Collected': c.dateCollected
317 }));
318
319 const worksheet = XLSX.utils.json_to_sheet(data);
320 const workbook = XLSX.utils.book_new();
321 XLSX.utils.book_append_sheet(workbook, worksheet, 'Contacts');
322
323 // Auto-size columns
324 const maxWidth = 50;
325 const colWidths = [
326 { wch: 20 }, // Name
327 { wch: 30 }, // Email
328 { wch: 20 }, // Phone
329 { wch: 25 }, // Source Domain
330 { wch: maxWidth }, // Page URL
331 { wch: 30 }, // Page Title
332 { wch: 20 } // Date Collected
333 ];
334 worksheet['!cols'] = colWidths;
335
336 XLSX.writeFile(workbook, `contacts_${Date.now()}.xlsx`);
337 }
338
339 // Create popup UI
340 function createPopup() {
341 // Remove existing popup if any
342 const existingPopup = document.getElementById('contact-scraper-popup');
343 if (existingPopup) {
344 existingPopup.remove();
345 return;
346 }
347
348 const popup = document.createElement('div');
349 popup.id = 'contact-scraper-popup';
350 popup.innerHTML = `
351 <div class="cs-header">
352 <h3>📇 Contact Scraper</h3>
353 <button class="cs-close" title="Close">✕</button>
354 </div>
355 <div class="cs-stats">
356 <span id="cs-count">Loading...</span>
357 </div>
358 <div class="cs-actions">
359 <button class="cs-btn cs-btn-primary" id="cs-export-excel">📊 Export Excel</button>
360 <button class="cs-btn cs-btn-primary" id="cs-export-csv">📄 Export CSV</button>
361 <button class="cs-btn cs-btn-secondary" id="cs-refresh">🔄 Refresh</button>
362 <button class="cs-btn cs-btn-danger" id="cs-clear">🗑️ Clear All</button>
363 </div>
364 <div class="cs-contacts" id="cs-contacts-list">
365 <div class="cs-loading">Loading contacts...</div>
366 </div>
367 `;
368
369 document.body.appendChild(popup);
370
371 // Add styles
372 const style = document.createElement('style');
373 style.textContent = `
374 #contact-scraper-popup {
375 position: fixed;
376 top: 50%;
377 left: 50%;
378 transform: translate(-50%, -50%);
379 width: 700px;
380 max-width: 90vw;
381 max-height: 80vh;
382 background: white;
383 border-radius: 12px;
384 box-shadow: 0 10px 40px rgba(0,0,0,0.3);
385 z-index: 999999;
386 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
387 display: flex;
388 flex-direction: column;
389 }
390
391 .cs-header {
392 display: flex;
393 justify-content: space-between;
394 align-items: center;
395 padding: 20px;
396 border-bottom: 2px solid #e0e0e0;
397 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
398 color: white;
399 border-radius: 12px 12px 0 0;
400 }
401
402 .cs-header h3 {
403 margin: 0;
404 font-size: 20px;
405 font-weight: 600;
406 }
407
408 .cs-close {
409 background: rgba(255,255,255,0.2);
410 border: none;
411 color: white;
412 font-size: 24px;
413 cursor: pointer;
414 width: 32px;
415 height: 32px;
416 border-radius: 50%;
417 display: flex;
418 align-items: center;
419 justify-content: center;
420 transition: background 0.2s;
421 }
422
423 .cs-close:hover {
424 background: rgba(255,255,255,0.3);
425 }
426
427 .cs-stats {
428 padding: 15px 20px;
429 background: #f8f9fa;
430 border-bottom: 1px solid #e0e0e0;
431 font-size: 14px;
432 color: #333;
433 font-weight: 500;
434 }
435
436 .cs-actions {
437 display: flex;
438 gap: 10px;
439 padding: 15px 20px;
440 border-bottom: 1px solid #e0e0e0;
441 flex-wrap: wrap;
442 }
443
444 .cs-btn {
445 padding: 8px 16px;
446 border: none;
447 border-radius: 6px;
448 cursor: pointer;
449 font-size: 13px;
450 font-weight: 500;
451 transition: all 0.2s;
452 flex: 1;
453 min-width: 120px;
454 }
455
456 .cs-btn-primary {
457 background: #667eea;
458 color: white;
459 }
460
461 .cs-btn-primary:hover {
462 background: #5568d3;
463 transform: translateY(-1px);
464 }
465
466 .cs-btn-secondary {
467 background: #6c757d;
468 color: white;
469 }
470
471 .cs-btn-secondary:hover {
472 background: #5a6268;
473 transform: translateY(-1px);
474 }
475
476 .cs-btn-danger {
477 background: #dc3545;
478 color: white;
479 }
480
481 .cs-btn-danger:hover {
482 background: #c82333;
483 transform: translateY(-1px);
484 }
485
486 .cs-contacts {
487 overflow-y: auto;
488 padding: 20px;
489 flex: 1;
490 min-height: 200px;
491 max-height: 400px;
492 }
493
494 .cs-loading {
495 text-align: center;
496 padding: 40px;
497 color: #666;
498 }
499
500 .cs-contact-item {
501 background: #f8f9fa;
502 border: 1px solid #e0e0e0;
503 border-radius: 8px;
504 padding: 15px;
505 margin-bottom: 12px;
506 transition: all 0.2s;
507 }
508
509 .cs-contact-item:hover {
510 box-shadow: 0 2px 8px rgba(0,0,0,0.1);
511 transform: translateY(-1px);
512 }
513
514 .cs-contact-name {
515 font-weight: 600;
516 font-size: 16px;
517 color: #333;
518 margin-bottom: 8px;
519 }
520
521 .cs-contact-detail {
522 font-size: 13px;
523 color: #666;
524 margin: 4px 0;
525 display: flex;
526 align-items: center;
527 gap: 8px;
528 }
529
530 .cs-contact-detail strong {
531 color: #333;
532 min-width: 100px;
533 }
534
535 .cs-contact-link {
536 color: #667eea;
537 text-decoration: none;
538 word-break: break-all;
539 }
540
541 .cs-contact-link:hover {
542 text-decoration: underline;
543 }
544
545 .cs-empty {
546 text-align: center;
547 padding: 40px;
548 color: #999;
549 }
550
551 .cs-empty-icon {
552 font-size: 48px;
553 margin-bottom: 10px;
554 }
555 `;
556 document.head.appendChild(style);
557
558 // Load and display contacts
559 loadContactsToPopup();
560
561 // Event listeners
562 popup.querySelector('.cs-close').addEventListener('click', () => {
563 popup.remove();
564 });
565
566 popup.querySelector('#cs-export-excel').addEventListener('click', async () => {
567 const contacts = await getAllContacts();
568 if (contacts.length > 0) {
569 exportToExcel(contacts);
570 } else {
571 alert('No contacts to export');
572 }
573 });
574
575 popup.querySelector('#cs-export-csv').addEventListener('click', async () => {
576 const contacts = await getAllContacts();
577 if (contacts.length > 0) {
578 exportToCSV(contacts);
579 } else {
580 alert('No contacts to export');
581 }
582 });
583
584 popup.querySelector('#cs-refresh').addEventListener('click', () => {
585 loadContactsToPopup();
586 });
587
588 popup.querySelector('#cs-clear').addEventListener('click', async () => {
589 if (confirm('Are you sure you want to clear all contacts? This cannot be undone.')) {
590 await clearAllContacts();
591 loadContactsToPopup();
592 }
593 });
594
595 // Close on outside click
596 popup.addEventListener('click', (e) => {
597 if (e.target === popup) {
598 popup.remove();
599 }
600 });
601 }
602
603 // Load contacts into popup
604 async function loadContactsToPopup() {
605 const contactsList = document.getElementById('cs-contacts-list');
606 const countElement = document.getElementById('cs-count');
607
608 if (!contactsList || !countElement) return;
609
610 contactsList.innerHTML = '<div class="cs-loading">Loading contacts...</div>';
611
612 const contacts = await getAllContacts();
613
614 countElement.textContent = `Total Contacts: ${contacts.length}`;
615
616 if (contacts.length === 0) {
617 contactsList.innerHTML = `
618 <div class="cs-empty">
619 <div class="cs-empty-icon">📭</div>
620 <div>No contacts collected yet</div>
621 <div style="margin-top: 10px; font-size: 12px;">Browse websites to automatically collect contacts</div>
622 </div>
623 `;
624 return;
625 }
626
627 // Sort by date (newest first)
628 contacts.sort((a, b) => new Date(b.dateCollected) - new Date(a.dateCollected));
629
630 contactsList.innerHTML = contacts.map(contact => `
631 <div class="cs-contact-item">
632 <div class="cs-contact-name">${contact.name}</div>
633 ${contact.email ? `<div class="cs-contact-detail"><strong>📧 Email:</strong> <span>${contact.email}</span></div>` : ''}
634 ${contact.phone ? `<div class="cs-contact-detail"><strong>📱 Phone:</strong> <span>${contact.phone}</span></div>` : ''}
635 <div class="cs-contact-detail"><strong>🌐 Domain:</strong> <span>${contact.sourceDomain}</span></div>
636 <div class="cs-contact-detail"><strong>📄 Page:</strong> <a href="${contact.pageUrl}" class="cs-contact-link" target="_blank" title="${contact.pageTitle}">${contact.pageTitle || contact.pageUrl}</a></div>
637 <div class="cs-contact-detail"><strong>📅 Collected:</strong> <span>${new Date(contact.dateCollected).toLocaleString()}</span></div>
638 </div>
639 `).join('');
640 }
641
642 // Add floating button to toggle popup
643 function createFloatingButton() {
644 const button = document.createElement('button');
645 button.id = 'contact-scraper-btn';
646 button.innerHTML = '📇';
647 button.title = 'Contact Scraper';
648
649 const style = document.createElement('style');
650 style.textContent = `
651 #contact-scraper-btn {
652 position: fixed;
653 bottom: 20px;
654 right: 20px;
655 width: 56px;
656 height: 56px;
657 border-radius: 50%;
658 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
659 color: white;
660 border: none;
661 font-size: 24px;
662 cursor: pointer;
663 box-shadow: 0 4px 12px rgba(0,0,0,0.3);
664 z-index: 999998;
665 transition: all 0.3s;
666 display: flex;
667 align-items: center;
668 justify-content: center;
669 }
670
671 #contact-scraper-btn:hover {
672 transform: scale(1.1);
673 box-shadow: 0 6px 16px rgba(0,0,0,0.4);
674 }
675
676 #contact-scraper-btn:active {
677 transform: scale(0.95);
678 }
679 `;
680
681 document.head.appendChild(style);
682 document.body.appendChild(button);
683
684 button.addEventListener('click', createPopup);
685 }
686
687 // Initialize
688 async function init() {
689 console.log('Contact Scraper Extension Initialized');
690
691 // Wait for page to be fully loaded
692 if (document.readyState === 'loading') {
693 document.addEventListener('DOMContentLoaded', () => {
694 setTimeout(init, 1000);
695 });
696 return;
697 }
698
699 // Create floating button
700 createFloatingButton();
701
702 // Scrape current page after a delay
703 setTimeout(() => {
704 scrapeContacts();
705 }, 2000);
706
707 // Listen for page changes (for SPAs)
708 let lastUrl = window.location.href;
709 const checkUrlChange = debounce(() => {
710 if (window.location.href !== lastUrl) {
711 lastUrl = window.location.href;
712 console.log('URL changed, scraping new page...');
713 setTimeout(() => {
714 scrapeContacts();
715 }, 2000);
716 }
717 }, 500);
718
719 // Monitor URL changes
720 setInterval(checkUrlChange, 1000);
721
722 // Also listen to popstate and pushstate
723 window.addEventListener('popstate', () => {
724 setTimeout(() => {
725 scrapeContacts();
726 }, 2000);
727 });
728
729 // Intercept pushState and replaceState
730 const originalPushState = history.pushState;
731 const originalReplaceState = history.replaceState;
732
733 history.pushState = function() {
734 originalPushState.apply(this, arguments);
735 setTimeout(() => {
736 scrapeContacts();
737 }, 2000);
738 };
739
740 history.replaceState = function() {
741 originalReplaceState.apply(this, arguments);
742 setTimeout(() => {
743 scrapeContacts();
744 }, 2000);
745 };
746
747 console.log('Contact scraper is now monitoring page changes');
748 }
749
750 // Start the extension
751 init();
752})();