Extract Facebook reel comments with improved selector detection and debugging
Size
26.4 KB
Version
2.0.0
Created
Dec 15, 2025
Updated
about 1 month ago
1// ==UserScript==
2// @name Facebook Comments Exporter Pro - Enhanced
3// @description Extract Facebook reel comments with improved selector detection and debugging
4// @version 2.0.0
5// @match https://*.facebook.com/*
6// @icon https://static.xx.fbcdn.net/rsrc.php/y1/r/ay1hV6OlegS.ico
7// @require https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js
8// @grant GM.openInTab
9// ==/UserScript==
10
11(function() {
12 'use strict';
13
14 console.log('๐ Facebook Comments Exporter Pro v2.0 - Initialized');
15
16 // ==================== CONFIG ====================
17 const CONFIG = {
18 MAX_SCROLLS: 20,
19 SCROLL_DELAY: 1500,
20 BATCH_SIZE: 3,
21 RATE_LIMIT_DELAY: 2000,
22 COMMENT_MIN_LENGTH: 2,
23 ENABLE_CACHING: true,
24 TIMEOUT_MS: 40000,
25 AUTO_SAVE_INTERVAL: 30000,
26 DEBUG_MODE: true,
27 AGGRESSIVE_SCROLL: true // NEW: More aggressive scrolling
28 };
29
30 // ==================== STATE ====================
31 let exportData = [];
32 let isProcessing = false;
33 let currentPanel = null;
34 let seenHashes = new Set();
35
36 // ==================== UTILITIES ====================
37 function wait(ms) {
38 return new Promise(resolve => setTimeout(resolve, ms));
39 }
40
41 function log(message, data = null) {
42 const timestamp = new Date().toLocaleTimeString();
43 const prefix = '๐ [FB Exporter]';
44 console.log(`${prefix} ${message}`, data || '');
45 }
46
47 function createHash(text) {
48 let hash = 0;
49 for (let i = 0; i < text.length; i++) {
50 const char = text.charCodeAt(i);
51 hash = ((hash << 5) - hash) + char;
52 hash = hash & hash;
53 }
54 return Math.abs(hash).toString(36);
55 }
56
57 // ==================== IMPROVED COMMENT EXTRACTION ====================
58 async function extractComments() {
59 log('Starting comment extraction...');
60 const comments = [];
61 const foundSelectors = [];
62
63 // Expand comments section if collapsed
64 const expandButtons = document.querySelectorAll('button[aria-label*="comment"], button[aria-label*="Comment"]');
65 for (const btn of expandButtons) {
66 if (!btn.getAttribute('aria-expanded') || btn.getAttribute('aria-expanded') === 'false') {
67 log('Expanding comments section...');
68 btn.click();
69 await wait(2000);
70 }
71 }
72
73 // Aggressive scrolling to load all comments
74 await scrollToLoadComments();
75
76 // ===== STRATEGY 1: Look for Facebook comment containers (role="article") =====
77 let commentElements = document.querySelectorAll('div[role="article"]');
78 if (commentElements.length > 0) {
79 foundSelectors.push(`role="article" (${commentElements.length})`);
80 comments.push(...extractFromArticles(commentElements));
81 }
82
83 // ===== STRATEGY 2: Look for comment data attributes =====
84 if (comments.length === 0) {
85 const commentsByData = document.querySelectorAll('[data-testid*="comment"], [data-testid*="Comment"]');
86 if (commentsByData.length > 0) {
87 foundSelectors.push(`data-testid (${commentsByData.length})`);
88 comments.push(...extractFromDataTestId(commentsByData));
89 }
90 }
91
92 // ===== STRATEGY 3: Look for comment text containers =====
93 if (comments.length === 0) {
94 const textContainers = document.querySelectorAll('div[dir="auto"][class*="x193iq5w"]');
95 if (textContainers.length > 0) {
96 foundSelectors.push(`text containers (${textContainers.length})`);
97 comments.push(...extractFromTextContainers(textContainers));
98 }
99 }
100
101 // ===== STRATEGY 4: Look for author links + adjacent text =====
102 if (comments.length === 0) {
103 const authorLinks = document.querySelectorAll('a[role="link"][href*="/"][href*="?comment"]');
104 if (authorLinks.length > 0) {
105 foundSelectors.push(`comment URLs (${authorLinks.length})`);
106 comments.push(...extractFromAuthorLinks(authorLinks));
107 }
108 }
109
110 // ===== STRATEGY 5: Fallback - Look for any text with author pattern =====
111 if (comments.length === 0) {
112 log('Using fallback extraction method...');
113 comments.push(...fallbackExtraction());
114 }
115
116 // Deduplicate
117 const uniqueComments = [];
118 for (const comment of comments) {
119 const hash = createHash(`${comment.author}|${comment.text}`);
120 if (!seenHashes.has(hash)) {
121 seenHashes.add(hash);
122 uniqueComments.push(comment);
123 }
124 }
125
126 log(`โ
Extracted ${uniqueComments.length} unique comments (Strategies: ${foundSelectors.join(', ')})`);
127 return uniqueComments;
128 }
129
130 // Extract from role="article" (Most reliable)
131 function extractFromArticles(elements) {
132 const comments = [];
133 elements.forEach((el) => {
134 try {
135 // Skip if it's the post itself
136 if (el.querySelector('[data-testid="post_message"]')) return;
137
138 // Find author
139 const authorLink = el.querySelector('a[role="link"][href*="/"]');
140 const author = authorLink?.textContent?.trim() || 'Unknown';
141 if (author === 'Unknown') return;
142
143 // Find comment text - multiple strategies
144 let commentText = '';
145
146 // Strategy A: Find text in divs with dir="auto"
147 const textDivs = el.querySelectorAll('div[dir="auto"]');
148 for (const div of textDivs) {
149 const text = div.textContent.trim();
150 if (text && text.length > CONFIG.COMMENT_MIN_LENGTH && !isTimestamp(text) && !isUIText(text)) {
151 if (text.length > commentText.length) {
152 commentText = text;
153 }
154 }
155 }
156
157 if (commentText && commentText !== author) {
158 comments.push({
159 author,
160 text: commentText.substring(0, 500),
161 timestamp: new Date().toISOString()
162 });
163 }
164 } catch (e) {
165 log('Error extracting from article:', e.message);
166 }
167 });
168 return comments;
169 }
170
171 // Extract from data-testid
172 function extractFromDataTestId(elements) {
173 const comments = [];
174 elements.forEach((el) => {
175 try {
176 const author = el.querySelector('[data-testid*="author"]')?.textContent?.trim() ||
177 el.querySelector('a[href*="/"]')?.textContent?.trim() || 'Unknown';
178 const commentText = el.textContent?.trim() || '';
179
180 if (author !== 'Unknown' && commentText && commentText.length > CONFIG.COMMENT_MIN_LENGTH && !isUIText(commentText)) {
181 comments.push({
182 author,
183 text: commentText.substring(0, 500),
184 timestamp: new Date().toISOString()
185 });
186 }
187 } catch (e) {
188 log('Error extracting from data-testid:', e.message);
189 }
190 });
191 return comments;
192 }
193
194 // Extract from text containers
195 function extractFromTextContainers(elements) {
196 const comments = [];
197 const processed = new Set();
198
199 elements.forEach((el) => {
200 try {
201 const text = el.textContent?.trim() || '';
202
203 if (text && text.length > CONFIG.COMMENT_MIN_LENGTH && !isUIText(text) && !processed.has(text)) {
204 // Look for author in parent chain
205 let author = 'Unknown';
206 let parent = el.closest('div[role="article"]') || el.closest('li') || el.closest('[role="listitem"]');
207
208 if (parent) {
209 const authorLink = parent.querySelector('a[role="link"][href*="/"]');
210 author = authorLink?.textContent?.trim() || 'Unknown';
211 }
212
213 if (author !== 'Unknown') {
214 processed.add(text);
215 comments.push({
216 author,
217 text: text.substring(0, 500),
218 timestamp: new Date().toISOString()
219 });
220 }
221 }
222 } catch (e) {
223 log('Error extracting from text container:', e.message);
224 }
225 });
226 return comments;
227 }
228
229 // Extract from author links
230 function extractFromAuthorLinks(elements) {
231 const comments = [];
232 elements.forEach((el) => {
233 try {
234 const author = el.textContent?.trim() || 'Unknown';
235 if (author === 'Unknown') return;
236
237 // Find next text content after author link
238 let node = el;
239 let commentText = '';
240
241 while (node && commentText.length < 100) {
242 node = node.nextSibling || node.parentElement?.nextSibling;
243 if (!node) break;
244
245 if (node.nodeType === Node.TEXT_NODE) {
246 commentText = node.textContent.trim();
247 } else if (node.nodeType === Node.ELEMENT_NODE) {
248 commentText = node.textContent?.trim() || '';
249 }
250
251 if (commentText && commentText.length > CONFIG.COMMENT_MIN_LENGTH) break;
252 }
253
254 if (commentText && commentText.length > CONFIG.COMMENT_MIN_LENGTH) {
255 comments.push({
256 author,
257 text: commentText.substring(0, 500),
258 timestamp: new Date().toISOString()
259 });
260 }
261 } catch (e) {
262 log('Error extracting from author links:', e.message);
263 }
264 });
265 return comments;
266 }
267
268 // Fallback extraction - scan entire page
269 function fallbackExtraction() {
270 const comments = [];
271 const allDivs = document.querySelectorAll('div[dir="auto"]');
272 const textMap = new Map();
273
274 allDivs.forEach((div) => {
275 const text = div.textContent?.trim() || '';
276 if (text && text.length > CONFIG.COMMENT_MIN_LENGTH && !isUIText(text) && !isTimestamp(text)) {
277 const author = findAuthorForElement(div);
278 if (author && author !== 'Unknown') {
279 const key = `${author}|${text}`;
280 if (!textMap.has(key)) {
281 textMap.set(key, {
282 author,
283 text: text.substring(0, 500),
284 timestamp: new Date().toISOString()
285 });
286 }
287 }
288 }
289 });
290
291 return Array.from(textMap.values());
292 }
293
294 function findAuthorForElement(el) {
295 let current = el;
296 while (current && current !== document.body) {
297 const link = current.querySelector('a[role="link"][href*="/"]');
298 if (link) return link.textContent?.trim();
299 current = current.parentElement;
300 }
301 return 'Unknown';
302 }
303
304 function isTimestamp(text) {
305 return /^\d+\s*(s|m|h|d)$/i.test(text) || /^\d+:\d+$/i.test(text);
306 }
307
308 function isUIText(text) {
309 const uiPatterns = ['Like', 'Reply', 'Share', 'Comment', 'Edit', 'Delete', 'React', 'More', 'ยท', 'ago'];
310 return uiPatterns.some(p => text.includes(p) && text.length < 30);
311 }
312
313 // ==================== SCROLL FUNCTION ====================
314 async function scrollToLoadComments() {
315 log('Starting aggressive scroll...');
316 let previousHeight = 0;
317 let scrollAttempts = 0;
318 let noChangeCount = 0;
319
320 while (scrollAttempts < CONFIG.MAX_SCROLLS && noChangeCount < 3) {
321 const currentHeight = document.documentElement.scrollHeight;
322
323 if (currentHeight === previousHeight) {
324 noChangeCount++;
325 log(`No new content (${noChangeCount}/3)`);
326 } else {
327 noChangeCount = 0;
328 }
329
330 // Scroll down
331 window.scrollBy(0, 1200);
332 await wait(CONFIG.SCROLL_DELAY);
333
334 // Scroll to specific comment section if available
335 const commentSection = document.querySelector('[role="feed"]') || document.querySelector('.comments');
336 if (commentSection) {
337 commentSection.scrollTop = commentSection.scrollHeight;
338 }
339
340 previousHeight = currentHeight;
341 scrollAttempts++;
342 log(`Scroll ${scrollAttempts}/${CONFIG.MAX_SCROLLS} (height: ${currentHeight})`);
343 }
344
345 log(`โ
Scrolling complete after ${scrollAttempts} attempts`);
346 }
347
348 // ==================== VIDEO TITLE EXTRACTION ====================
349 async function extractVideoTitle() {
350 log('Extracting video title...');
351
352 // Try multiple strategies
353 const strategies = [
354 () => document.querySelector('meta[property="og:title"]')?.getAttribute('content'),
355 () => document.querySelector('h1')?.textContent?.trim(),
356 () => document.querySelector('[role="main"] h2')?.textContent?.trim(),
357 () => document.querySelector('[data-testid="post_message"]')?.textContent?.trim()?.substring(0, 100),
358 () => window.location.href.includes('reel') ? 'Facebook Reel' : 'Facebook Post'
359 ];
360
361 for (const strategy of strategies) {
362 try {
363 const title = strategy();
364 if (title && title.length > 5) {
365 log(`Found title: ${title.substring(0, 50)}...`);
366 return title;
367 }
368 } catch (e) {
369 // Continue to next strategy
370 }
371 }
372
373 return 'Title not found';
374 }
375
376 // ==================== PROCESS FUNCTIONS ====================
377 async function processCurrentPage(updateStatus) {
378 log('Processing current page...');
379 updateStatus('๐ Extracting data from current page...');
380
381 try {
382 const title = await extractVideoTitle();
383 const comments = await extractComments();
384 const url = window.location.href;
385
386 const videoData = {
387 url,
388 title,
389 comments,
390 commentCount: comments.length,
391 scrapedAt: new Date().toISOString()
392 };
393
394 exportData.push(videoData);
395 updateStatus(`โ
Extracted: "${title.substring(0, 40)}..." (${comments.length} comments)`);
396 return videoData;
397 } catch (error) {
398 log('Error processing current page:', error);
399 updateStatus(`โ Error: ${error.message}`);
400 return null;
401 }
402 }
403
404 // ==================== EXPORT FUNCTIONS ====================
405 function exportToCSV() {
406 let csv = 'Video URL,Video Title,Comment Author,Comment Text,Scraped At\n';
407
408 exportData.forEach(video => {
409 video.comments.forEach(comment => {
410 const row = [
411 `"${video.url}"`,
412 `"${video.title.replace(/"/g, '""')}"`,
413 `"${comment.author.replace(/"/g, '""')}"`,
414 `"${comment.text.replace(/"/g, '""')}"`,
415 `"${video.scrapedAt}"`
416 ].join(',');
417 csv += row + '\n';
418 });
419 });
420
421 const blob = new Blob([csv], { type: 'text/csv;charset=utf-8;' });
422 downloadBlob(blob, `facebook_comments_${Date.now()}.csv`);
423 log('โ
CSV exported');
424 }
425
426 function exportToJSON() {
427 const json = JSON.stringify(exportData, null, 2);
428 const blob = new Blob([json], { type: 'application/json' });
429 downloadBlob(blob, `facebook_comments_${Date.now()}.json`);
430 log('โ
JSON exported');
431 }
432
433 function exportToExcel() {
434 const excelData = [];
435 exportData.forEach(video => {
436 video.comments.forEach(comment => {
437 excelData.push({
438 'Video URL': video.url,
439 'Video Title': video.title,
440 'Comment Author': comment.author,
441 'Comment Text': comment.text,
442 'Scraped At': video.scrapedAt
443 });
444 });
445 });
446
447 const wb = XLSX.utils.book_new();
448 const ws = XLSX.utils.json_to_sheet(excelData);
449 ws['!cols'] = [
450 { wch: 50 },
451 { wch: 40 },
452 { wch: 20 },
453 { wch: 60 },
454 { wch: 20 }
455 ];
456 XLSX.utils.book_append_sheet(wb, ws, 'Comments');
457 XLSX.writeFile(wb, `facebook_comments_${Date.now()}.xlsx`);
458 log('โ
Excel exported');
459 }
460
461 function downloadBlob(blob, filename) {
462 const url = URL.createObjectURL(blob);
463 const link = document.createElement('a');
464 link.href = url;
465 link.download = filename;
466 link.style.visibility = 'hidden';
467 document.body.appendChild(link);
468 link.click();
469 document.body.removeChild(link);
470 URL.revokeObjectURL(url);
471 }
472
473 // ==================== UI PANEL ====================
474 function createPanel() {
475 if (currentPanel) currentPanel.remove();
476
477 const panel = document.createElement('div');
478 panel.id = 'fb-comments-exporter-panel';
479 panel.innerHTML = `
480 <div style="
481 position: fixed;
482 top: 50%;
483 left: 50%;
484 transform: translate(-50%, -50%);
485 background: #ffffff;
486 border: 2px solid #1877f2;
487 border-radius: 12px;
488 padding: 24px;
489 width: 600px;
490 max-width: 90vw;
491 max-height: 85vh;
492 overflow-y: auto;
493 z-index: 999999;
494 box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
495 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
496 ">
497 <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
498 <h2 style="margin: 0; color: #1877f2; font-size: 20px; font-weight: 600;">
499 ๐ฌ Comments Exporter Pro v2
500 </h2>
501 <button id="fb-close-panel" style="
502 background: none;
503 border: none;
504 font-size: 24px;
505 cursor: pointer;
506 color: #65676b;
507 padding: 0;
508 width: 32px;
509 height: 32px;
510 border-radius: 50%;
511 transition: background 0.2s;
512 " title="Close (ESC)">ร</button>
513 </div>
514
515 <button id="fb-process-current" style="
516 width: 100%;
517 padding: 12px;
518 background: #1877f2;
519 color: white;
520 border: none;
521 border-radius: 8px;
522 font-size: 15px;
523 font-weight: 600;
524 cursor: pointer;
525 margin-bottom: 15px;
526 transition: background 0.2s;
527 ">
528 ๐ Extract Comments from This Page
529 </button>
530
531 <div id="fb-status" style="
532 margin-bottom: 20px;
533 padding: 12px;
534 background: #f0f2f5;
535 border-radius: 8px;
536 min-height: 80px;
537 max-height: 180px;
538 overflow-y: auto;
539 font-size: 12px;
540 color: #050505;
541 line-height: 1.6;
542 font-family: monospace;
543 ">
544 <div style="color: #65676b;">Ready. Press ESC to close.</div>
545 </div>
546
547 <div style="margin-bottom: 20px; padding: 12px; background: #f0fdf4; border-radius: 8px;">
548 <span id="fb-data-count" style="color: #166534; font-weight: 600;">0 videos โข 0 comments</span>
549 </div>
550
551 <div style="display: flex; gap: 10px;">
552 <button id="fb-export-csv" style="flex: 1; padding: 10px; background: #42b72a; color: white; border: none; border-radius: 8px; font-size: 13px; font-weight: 600; cursor: pointer;">๐ CSV</button>
553 <button id="fb-export-excel" style="flex: 1; padding: 10px; background: #42b72a; color: white; border: none; border-radius: 8px; font-size: 13px; font-weight: 600; cursor: pointer;">๐ Excel</button>
554 <button id="fb-export-json" style="flex: 1; padding: 10px; background: #42b72a; color: white; border: none; border-radius: 8px; font-size: 13px; font-weight: 600; cursor: pointer;">๐ JSON</button>
555 <button id="fb-clear-data" style="flex: 1; padding: 10px; background: #e4e6eb; color: #050505; border: none; border-radius: 8px; font-size: 13px; font-weight: 600; cursor: pointer;">๐๏ธ Clear</button>
556 </div>
557 </div>
558 `;
559
560 document.body.appendChild(panel);
561 currentPanel = panel;
562
563 // Event listeners
564 document.getElementById('fb-close-panel').addEventListener('click', closePanel);
565 document.getElementById('fb-process-current').addEventListener('click', handleProcessCurrent);
566 document.getElementById('fb-export-csv').addEventListener('click', handleExportCSV);
567 document.getElementById('fb-export-excel').addEventListener('click', handleExportExcel);
568 document.getElementById('fb-export-json').addEventListener('click', handleExportJSON);
569 document.getElementById('fb-clear-data').addEventListener('click', handleClearData);
570
571 updateDataCount();
572 }
573
574 function updateStatus(message) {
575 const statusDiv = document.getElementById('fb-status');
576 if (statusDiv) {
577 const timestamp = new Date().toLocaleTimeString();
578 statusDiv.innerHTML += `<div>[${timestamp}] ${message}</div>`;
579 statusDiv.scrollTop = statusDiv.scrollHeight;
580 }
581 }
582
583 function updateDataCount() {
584 const countSpan = document.getElementById('fb-data-count');
585 if (countSpan) {
586 const totalComments = exportData.reduce((sum, v) => sum + v.commentCount, 0);
587 countSpan.textContent = `${exportData.length} videos โข ${totalComments} comments`;
588 }
589 }
590
591 function closePanel() {
592 if (currentPanel) {
593 currentPanel.remove();
594 currentPanel = null;
595 }
596 }
597
598 async function handleProcessCurrent() {
599 if (isProcessing) {
600 updateStatus('โ ๏ธ Already processing...');
601 return;
602 }
603 isProcessing = true;
604 const btn = document.getElementById('fb-process-current');
605 btn.disabled = true;
606 await processCurrentPage(updateStatus);
607 updateDataCount();
608 btn.disabled = false;
609 isProcessing = false;
610 }
611
612 function handleExportCSV() {
613 if (exportData.length === 0) {
614 updateStatus('โ ๏ธ No data to export');
615 return;
616 }
617 exportToCSV();
618 updateStatus('โ
CSV downloaded');
619 }
620
621 function handleExportJSON() {
622 if (exportData.length === 0) {
623 updateStatus('โ ๏ธ No data to export');
624 return;
625 }
626 exportToJSON();
627 updateStatus('โ
JSON downloaded');
628 }
629
630 function handleExportExcel() {
631 if (exportData.length === 0) {
632 updateStatus('โ ๏ธ No data to export');
633 return;
634 }
635 exportToExcel();
636 updateStatus('โ
Excel downloaded');
637 }
638
639 function handleClearData() {
640 if (confirm('Clear all data?')) {
641 exportData = [];
642 seenHashes.clear();
643 updateDataCount();
644 updateStatus('โ
Data cleared');
645 }
646 }
647
648 // Keyboard shortcuts
649 document.addEventListener('keydown', (e) => {
650 if (e.key === 'Escape' && currentPanel) closePanel();
651 if (e.ctrlKey && e.shiftKey && e.key === 'E') {
652 e.preventDefault();
653 if (currentPanel) closePanel();
654 else createPanel();
655 }
656 });
657
658 // Floating button
659 function createFloatingButton() {
660 const btn = document.createElement('button');
661 btn.id = 'fb-comments-exporter-btn';
662 btn.innerHTML = '๐ฌ';
663 btn.title = 'Comments Exporter (Ctrl+Shift+E)';
664 btn.style.cssText = `
665 position: fixed;
666 bottom: 20px;
667 right: 20px;
668 width: 56px;
669 height: 56px;
670 border-radius: 50%;
671 background: #1877f2;
672 color: white;
673 border: none;
674 font-size: 24px;
675 cursor: pointer;
676 box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
677 z-index: 999998;
678 transition: all 0.3s;
679 display: flex;
680 align-items: center;
681 justify-content: center;
682 `;
683 btn.addEventListener('mouseenter', () => {
684 btn.style.transform = 'scale(1.1)';
685 btn.style.boxShadow = '0 6px 16px rgba(0, 0, 0, 0.4)';
686 });
687 btn.addEventListener('mouseleave', () => {
688 btn.style.transform = 'scale(1)';
689 btn.style.boxShadow = '0 4px 12px rgba(0, 0, 0, 0.3)';
690 });
691 btn.addEventListener('click', () => {
692 if (currentPanel) closePanel();
693 else createPanel();
694 });
695 document.body.appendChild(btn);
696 }
697
698 // Initialize
699 function init() {
700 log('Initializing...');
701 if (document.body) {
702 createFloatingButton();
703 } else {
704 const obs = new MutationObserver(() => {
705 if (document.body) {
706 createFloatingButton();
707 obs.disconnect();
708 }
709 });
710 obs.observe(document.documentElement, { childList: true });
711 }
712 }
713
714 if (document.readyState === 'loading') {
715 document.addEventListener('DOMContentLoaded', init);
716 } else {
717 init();
718 }
719})();