Automatically scrolls through comments and extracts usernames and comment text to a JSON file
Size
11.9 KB
Version
1.0.1
Created
Feb 11, 2026
Updated
27 days ago
1// ==UserScript==
2// @name Instagram Comments Extractor
3// @description Automatically scrolls through comments and extracts usernames and comment text to a JSON file
4// @version 1.0.1
5// @match https://*.instagram.com/*
6// @icon https://static.cdninstagram.com/rsrc.php/y4/r/QaBlI0OZiks.ico
7// @grant GM.getValue
8// @grant GM.setValue
9// ==/UserScript==
10(function() {
11 'use strict';
12
13 console.log('Instagram Comments Extractor loaded');
14
15 // Debounce function to prevent excessive calls
16 function debounce(func, wait) {
17 let timeout;
18 return function executedFunction(...args) {
19 const later = () => {
20 clearTimeout(timeout);
21 func(...args);
22 };
23 clearTimeout(timeout);
24 timeout = setTimeout(later, wait);
25 };
26 }
27
28 // Function to extract comments from the page
29 function extractComments() {
30 const comments = [];
31 const commentElements = document.querySelectorAll('li._a9zj, ul._a9z6 > li');
32
33 console.log(`Found ${commentElements.length} comment elements`);
34
35 commentElements.forEach((commentEl, index) => {
36 try {
37 // Try multiple methods to extract username
38 let username = null;
39
40 // Method 1: Look for h2 or h3 with link
41 const h2Link = commentEl.querySelector('h2 a, h3 a');
42 if (h2Link) {
43 username = h2Link.textContent.trim();
44 }
45
46 // Method 2: Look for specific class patterns
47 if (!username) {
48 const usernameSpan = commentEl.querySelector('span._ap3a._aaco._aacw._aacx._aad7._aade');
49 if (usernameSpan) {
50 username = usernameSpan.textContent.trim();
51 }
52 }
53
54 // Method 3: Parse from full text (first word before space)
55 if (!username) {
56 const fullText = commentEl.textContent.trim();
57 const match = fullText.match(/^([a-zA-Z0-9._]+)/);
58 if (match) {
59 username = match[1];
60 }
61 }
62
63 // Extract comment text
64 let commentText = null;
65
66 // Method 1: Look for span with dir="auto"
67 const commentSpan = commentEl.querySelector('span[dir="auto"]');
68 if (commentSpan) {
69 commentText = commentSpan.textContent.trim();
70 }
71
72 // Method 2: Get all text and remove username
73 if (!commentText) {
74 const fullText = commentEl.textContent.trim();
75 // Remove username, verified badge, timestamps, and action text
76 commentText = fullText
77 .replace(/^[a-zA-Z0-9._]+/, '')
78 .replace(/Verified/g, '')
79 .replace(/\d+[wdhms]/g, '')
80 .replace(/See translation/g, '')
81 .replace(/Edited/g, '')
82 .replace(/ยท/g, '')
83 .trim();
84 }
85
86 if (username && commentText) {
87 comments.push({
88 username: username,
89 comment: commentText
90 });
91 }
92 } catch (error) {
93 console.error(`Error extracting comment ${index}:`, error);
94 }
95 });
96
97 return comments;
98 }
99
100 // Function to scroll the comments section
101 async function scrollComments(maxScrolls = 50) {
102 console.log('Starting to scroll comments...');
103
104 // Find the scrollable comments container
105 const commentsContainer = document.querySelector('div.x78zum5.xdt5ytf.x1q2y9iw.x1n2onr6.xh8yej3');
106
107 if (!commentsContainer) {
108 console.error('Comments container not found');
109 return false;
110 }
111
112 let scrollCount = 0;
113 let previousHeight = 0;
114 let noChangeCount = 0;
115
116 while (scrollCount < maxScrolls) {
117 // Scroll to bottom of comments container
118 commentsContainer.scrollTop = commentsContainer.scrollHeight;
119
120 // Wait for new comments to load
121 await new Promise(resolve => setTimeout(resolve, 2000));
122
123 const currentHeight = commentsContainer.scrollHeight;
124
125 if (currentHeight === previousHeight) {
126 noChangeCount++;
127 console.log(`No new comments loaded (${noChangeCount}/3)`);
128
129 // If height hasn't changed 3 times in a row, we've reached the end
130 if (noChangeCount >= 3) {
131 console.log('Reached end of comments');
132 break;
133 }
134 } else {
135 noChangeCount = 0;
136 console.log(`Scrolled ${scrollCount + 1} times, loaded more comments`);
137 }
138
139 previousHeight = currentHeight;
140 scrollCount++;
141 }
142
143 console.log(`Finished scrolling after ${scrollCount} attempts`);
144 return true;
145 }
146
147 // Function to download JSON file
148 function downloadJSON(data, filename) {
149 const jsonStr = JSON.stringify(data, null, 2);
150 const blob = new Blob([jsonStr], { type: 'application/json' });
151 const url = URL.createObjectURL(blob);
152
153 const a = document.createElement('a');
154 a.href = url;
155 a.download = filename;
156 document.body.appendChild(a);
157 a.click();
158 document.body.removeChild(a);
159 URL.revokeObjectURL(url);
160
161 console.log(`Downloaded ${filename}`);
162 }
163
164 // Function to create and add the extraction button
165 function createExtractionButton() {
166 // Check if button already exists
167 if (document.getElementById('ig-comments-extractor-btn')) {
168 return;
169 }
170
171 // Wait for the page to be ready
172 const checkInterval = setInterval(() => {
173 const targetContainer = document.querySelector('section._aamu');
174
175 if (targetContainer) {
176 clearInterval(checkInterval);
177
178 // Create button
179 const button = document.createElement('button');
180 button.id = 'ig-comments-extractor-btn';
181 button.textContent = '๐ฅ Extract Comments';
182 button.style.cssText = `
183 position: fixed;
184 bottom: 20px;
185 right: 20px;
186 z-index: 9999;
187 padding: 12px 24px;
188 background: linear-gradient(45deg, #f09433 0%, #e6683c 25%, #dc2743 50%, #cc2366 75%, #bc1888 100%);
189 color: white;
190 border: none;
191 border-radius: 8px;
192 font-size: 14px;
193 font-weight: 600;
194 cursor: pointer;
195 box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
196 transition: transform 0.2s, box-shadow 0.2s;
197 `;
198
199 button.addEventListener('mouseenter', () => {
200 button.style.transform = 'translateY(-2px)';
201 button.style.boxShadow = '0 6px 16px rgba(0, 0, 0, 0.4)';
202 });
203
204 button.addEventListener('mouseleave', () => {
205 button.style.transform = 'translateY(0)';
206 button.style.boxShadow = '0 4px 12px rgba(0, 0, 0, 0.3)';
207 });
208
209 button.addEventListener('click', async () => {
210 button.disabled = true;
211 button.textContent = 'โณ Scrolling...';
212
213 try {
214 // Scroll through comments
215 await scrollComments(50);
216
217 button.textContent = '๐ Extracting...';
218
219 // Wait a bit for final render
220 await new Promise(resolve => setTimeout(resolve, 1000));
221
222 // Extract comments
223 const comments = extractComments();
224
225 console.log(`Extracted ${comments.length} comments`);
226
227 if (comments.length > 0) {
228 // Generate filename with post ID and timestamp
229 const postId = window.location.pathname.split('/')[2] || 'post';
230 const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
231 const filename = `instagram_comments_${postId}_${timestamp}.json`;
232
233 // Download JSON file
234 downloadJSON(comments, filename);
235
236 button.textContent = `โ
Downloaded ${comments.length} comments`;
237 button.style.background = '#4CAF50';
238
239 setTimeout(() => {
240 button.textContent = '๐ฅ Extract Comments';
241 button.style.background = 'linear-gradient(45deg, #f09433 0%, #e6683c 25%, #dc2743 50%, #cc2366 75%, #bc1888 100%)';
242 button.disabled = false;
243 }, 3000);
244 } else {
245 button.textContent = 'โ No comments found';
246 button.style.background = '#f44336';
247
248 setTimeout(() => {
249 button.textContent = '๐ฅ Extract Comments';
250 button.style.background = 'linear-gradient(45deg, #f09433 0%, #e6683c 25%, #dc2743 50%, #cc2366 75%, #bc1888 100%)';
251 button.disabled = false;
252 }, 3000);
253 }
254 } catch (error) {
255 console.error('Error during extraction:', error);
256 button.textContent = 'โ Error occurred';
257 button.style.background = '#f44336';
258
259 setTimeout(() => {
260 button.textContent = '๐ฅ Extract Comments';
261 button.style.background = 'linear-gradient(45deg, #f09433 0%, #e6683c 25%, #dc2743 50%, #cc2366 75%, #bc1888 100%)';
262 button.disabled = false;
263 }, 3000);
264 }
265 });
266
267 document.body.appendChild(button);
268 console.log('Extraction button added to page');
269 }
270 }, 1000);
271 }
272
273 // Initialize the extension
274 function init() {
275 // Check if we're on a post page
276 if (window.location.pathname.includes('/p/')) {
277 console.log('On Instagram post page, initializing...');
278 createExtractionButton();
279 }
280 }
281
282 // Run on page load
283 if (document.readyState === 'loading') {
284 document.addEventListener('DOMContentLoaded', init);
285 } else {
286 init();
287 }
288
289 // Handle navigation changes (Instagram is a SPA)
290 let lastUrl = location.href;
291 new MutationObserver(() => {
292 const url = location.href;
293 if (url !== lastUrl) {
294 lastUrl = url;
295 console.log('URL changed, reinitializing...');
296 init();
297 }
298 }).observe(document, { subtree: true, childList: true });
299
300})();