Instagram Comments Extractor

Automatically scrolls through comments and extracts usernames and comment text to a JSON file

Size

11.9 KB

Version

1.0.1

Created

Feb 11, 2026

Updated

27 days ago

1// ==UserScript==
2// @name		Instagram Comments Extractor
3// @description		Automatically scrolls through comments and extracts usernames and comment text to a JSON file
4// @version		1.0.1
5// @match		https://*.instagram.com/*
6// @icon		https://static.cdninstagram.com/rsrc.php/y4/r/QaBlI0OZiks.ico
7// @grant		GM.getValue
8// @grant		GM.setValue
9// ==/UserScript==
10(function() {
11    'use strict';
12
13    console.log('Instagram Comments Extractor loaded');
14
15    // Debounce function to prevent excessive calls
16    function debounce(func, wait) {
17        let timeout;
18        return function executedFunction(...args) {
19            const later = () => {
20                clearTimeout(timeout);
21                func(...args);
22            };
23            clearTimeout(timeout);
24            timeout = setTimeout(later, wait);
25        };
26    }
27
28    // Function to extract comments from the page
29    function extractComments() {
30        const comments = [];
31        const commentElements = document.querySelectorAll('li._a9zj, ul._a9z6 > li');
32        
33        console.log(`Found ${commentElements.length} comment elements`);
34        
35        commentElements.forEach((commentEl, index) => {
36            try {
37                // Try multiple methods to extract username
38                let username = null;
39                
40                // Method 1: Look for h2 or h3 with link
41                const h2Link = commentEl.querySelector('h2 a, h3 a');
42                if (h2Link) {
43                    username = h2Link.textContent.trim();
44                }
45                
46                // Method 2: Look for specific class patterns
47                if (!username) {
48                    const usernameSpan = commentEl.querySelector('span._ap3a._aaco._aacw._aacx._aad7._aade');
49                    if (usernameSpan) {
50                        username = usernameSpan.textContent.trim();
51                    }
52                }
53                
54                // Method 3: Parse from full text (first word before space)
55                if (!username) {
56                    const fullText = commentEl.textContent.trim();
57                    const match = fullText.match(/^([a-zA-Z0-9._]+)/);
58                    if (match) {
59                        username = match[1];
60                    }
61                }
62                
63                // Extract comment text
64                let commentText = null;
65                
66                // Method 1: Look for span with dir="auto"
67                const commentSpan = commentEl.querySelector('span[dir="auto"]');
68                if (commentSpan) {
69                    commentText = commentSpan.textContent.trim();
70                }
71                
72                // Method 2: Get all text and remove username
73                if (!commentText) {
74                    const fullText = commentEl.textContent.trim();
75                    // Remove username, verified badge, timestamps, and action text
76                    commentText = fullText
77                        .replace(/^[a-zA-Z0-9._]+/, '')
78                        .replace(/Verified/g, '')
79                        .replace(/\d+[wdhms]/g, '')
80                        .replace(/See translation/g, '')
81                        .replace(/Edited/g, '')
82                        .replace(/ยท/g, '')
83                        .trim();
84                }
85                
86                if (username && commentText) {
87                    comments.push({
88                        username: username,
89                        comment: commentText
90                    });
91                }
92            } catch (error) {
93                console.error(`Error extracting comment ${index}:`, error);
94            }
95        });
96        
97        return comments;
98    }
99
100    // Function to scroll the comments section
101    async function scrollComments(maxScrolls = 50) {
102        console.log('Starting to scroll comments...');
103        
104        // Find the scrollable comments container
105        const commentsContainer = document.querySelector('div.x78zum5.xdt5ytf.x1q2y9iw.x1n2onr6.xh8yej3');
106        
107        if (!commentsContainer) {
108            console.error('Comments container not found');
109            return false;
110        }
111        
112        let scrollCount = 0;
113        let previousHeight = 0;
114        let noChangeCount = 0;
115        
116        while (scrollCount < maxScrolls) {
117            // Scroll to bottom of comments container
118            commentsContainer.scrollTop = commentsContainer.scrollHeight;
119            
120            // Wait for new comments to load
121            await new Promise(resolve => setTimeout(resolve, 2000));
122            
123            const currentHeight = commentsContainer.scrollHeight;
124            
125            if (currentHeight === previousHeight) {
126                noChangeCount++;
127                console.log(`No new comments loaded (${noChangeCount}/3)`);
128                
129                // If height hasn't changed 3 times in a row, we've reached the end
130                if (noChangeCount >= 3) {
131                    console.log('Reached end of comments');
132                    break;
133                }
134            } else {
135                noChangeCount = 0;
136                console.log(`Scrolled ${scrollCount + 1} times, loaded more comments`);
137            }
138            
139            previousHeight = currentHeight;
140            scrollCount++;
141        }
142        
143        console.log(`Finished scrolling after ${scrollCount} attempts`);
144        return true;
145    }
146
147    // Function to download JSON file
148    function downloadJSON(data, filename) {
149        const jsonStr = JSON.stringify(data, null, 2);
150        const blob = new Blob([jsonStr], { type: 'application/json' });
151        const url = URL.createObjectURL(blob);
152        
153        const a = document.createElement('a');
154        a.href = url;
155        a.download = filename;
156        document.body.appendChild(a);
157        a.click();
158        document.body.removeChild(a);
159        URL.revokeObjectURL(url);
160        
161        console.log(`Downloaded ${filename}`);
162    }
163
164    // Function to create and add the extraction button
165    function createExtractionButton() {
166        // Check if button already exists
167        if (document.getElementById('ig-comments-extractor-btn')) {
168            return;
169        }
170        
171        // Wait for the page to be ready
172        const checkInterval = setInterval(() => {
173            const targetContainer = document.querySelector('section._aamu');
174            
175            if (targetContainer) {
176                clearInterval(checkInterval);
177                
178                // Create button
179                const button = document.createElement('button');
180                button.id = 'ig-comments-extractor-btn';
181                button.textContent = '๐Ÿ“ฅ Extract Comments';
182                button.style.cssText = `
183                    position: fixed;
184                    bottom: 20px;
185                    right: 20px;
186                    z-index: 9999;
187                    padding: 12px 24px;
188                    background: linear-gradient(45deg, #f09433 0%, #e6683c 25%, #dc2743 50%, #cc2366 75%, #bc1888 100%);
189                    color: white;
190                    border: none;
191                    border-radius: 8px;
192                    font-size: 14px;
193                    font-weight: 600;
194                    cursor: pointer;
195                    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
196                    transition: transform 0.2s, box-shadow 0.2s;
197                `;
198                
199                button.addEventListener('mouseenter', () => {
200                    button.style.transform = 'translateY(-2px)';
201                    button.style.boxShadow = '0 6px 16px rgba(0, 0, 0, 0.4)';
202                });
203                
204                button.addEventListener('mouseleave', () => {
205                    button.style.transform = 'translateY(0)';
206                    button.style.boxShadow = '0 4px 12px rgba(0, 0, 0, 0.3)';
207                });
208                
209                button.addEventListener('click', async () => {
210                    button.disabled = true;
211                    button.textContent = 'โณ Scrolling...';
212                    
213                    try {
214                        // Scroll through comments
215                        await scrollComments(50);
216                        
217                        button.textContent = '๐Ÿ“Š Extracting...';
218                        
219                        // Wait a bit for final render
220                        await new Promise(resolve => setTimeout(resolve, 1000));
221                        
222                        // Extract comments
223                        const comments = extractComments();
224                        
225                        console.log(`Extracted ${comments.length} comments`);
226                        
227                        if (comments.length > 0) {
228                            // Generate filename with post ID and timestamp
229                            const postId = window.location.pathname.split('/')[2] || 'post';
230                            const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
231                            const filename = `instagram_comments_${postId}_${timestamp}.json`;
232                            
233                            // Download JSON file
234                            downloadJSON(comments, filename);
235                            
236                            button.textContent = `โœ… Downloaded ${comments.length} comments`;
237                            button.style.background = '#4CAF50';
238                            
239                            setTimeout(() => {
240                                button.textContent = '๐Ÿ“ฅ Extract Comments';
241                                button.style.background = 'linear-gradient(45deg, #f09433 0%, #e6683c 25%, #dc2743 50%, #cc2366 75%, #bc1888 100%)';
242                                button.disabled = false;
243                            }, 3000);
244                        } else {
245                            button.textContent = 'โŒ No comments found';
246                            button.style.background = '#f44336';
247                            
248                            setTimeout(() => {
249                                button.textContent = '๐Ÿ“ฅ Extract Comments';
250                                button.style.background = 'linear-gradient(45deg, #f09433 0%, #e6683c 25%, #dc2743 50%, #cc2366 75%, #bc1888 100%)';
251                                button.disabled = false;
252                            }, 3000);
253                        }
254                    } catch (error) {
255                        console.error('Error during extraction:', error);
256                        button.textContent = 'โŒ Error occurred';
257                        button.style.background = '#f44336';
258                        
259                        setTimeout(() => {
260                            button.textContent = '๐Ÿ“ฅ Extract Comments';
261                            button.style.background = 'linear-gradient(45deg, #f09433 0%, #e6683c 25%, #dc2743 50%, #cc2366 75%, #bc1888 100%)';
262                            button.disabled = false;
263                        }, 3000);
264                    }
265                });
266                
267                document.body.appendChild(button);
268                console.log('Extraction button added to page');
269            }
270        }, 1000);
271    }
272
273    // Initialize the extension
274    function init() {
275        // Check if we're on a post page
276        if (window.location.pathname.includes('/p/')) {
277            console.log('On Instagram post page, initializing...');
278            createExtractionButton();
279        }
280    }
281
282    // Run on page load
283    if (document.readyState === 'loading') {
284        document.addEventListener('DOMContentLoaded', init);
285    } else {
286        init();
287    }
288
289    // Handle navigation changes (Instagram is a SPA)
290    let lastUrl = location.href;
291    new MutationObserver(() => {
292        const url = location.href;
293        if (url !== lastUrl) {
294            lastUrl = url;
295            console.log('URL changed, reinitializing...');
296            init();
297        }
298    }).observe(document, { subtree: true, childList: true });
299
300})();