Size
12.0 KB
Version
1.0.1
Created
Nov 21, 2025
Updated
22 days ago
1// ==UserScript==
2// @name 小红书批量数据采集器
3// @description 自动滚动加载内容并批量采集数据,支持导出JSON格式
4// @version 1.0.1
5// @match https://*.xiaohongshu.com/*
6// @icon https://fe-video-qc.xhscdn.com/fe-platform/ed8fe781ce9e16c1bfac2cd962f0721edabe2e49.ico
7// ==/UserScript==
8(function() {
9 'use strict';
10
11 // 采集的数据存储
12 let collectedData = [];
13 let isCollecting = false;
14 let autoScrollInterval = null;
15
16 // 防抖函数
17 function debounce(func, wait) {
18 let timeout;
19 return function executedFunction(...args) {
20 const later = () => {
21 clearTimeout(timeout);
22 func(...args);
23 };
24 clearTimeout(timeout);
25 timeout = setTimeout(later, wait);
26 };
27 }
28
29 // 采集单个笔记数据
30 function collectNoteData(noteElement) {
31 try {
32 const noteId = noteElement.getAttribute('data-note-id') ||
33 noteElement.querySelector('a')?.href?.match(/\/explore\/([a-zA-Z0-9]+)/)?.[1] ||
34 'unknown_' + Date.now();
35
36 // 检查是否已采集
37 if (collectedData.some(item => item.id === noteId)) {
38 return null;
39 }
40
41 const titleElement = noteElement.querySelector('.title, .note-title, [class*="title"]');
42 const authorElement = noteElement.querySelector('.author, .name, [class*="author"], [class*="name"]');
43 const linkElement = noteElement.querySelector('a[href*="/explore/"]');
44 const imageElement = noteElement.querySelector('img');
45 const likeElement = noteElement.querySelector('[class*="like"], [class*="count"]');
46
47 const data = {
48 id: noteId,
49 title: titleElement?.textContent?.trim() || '无标题',
50 author: authorElement?.textContent?.trim() || '未知作者',
51 link: linkElement?.href || '',
52 image: imageElement?.src || '',
53 likes: likeElement?.textContent?.trim() || '0',
54 collectedAt: new Date().toISOString()
55 };
56
57 console.log('采集到笔记数据:', data);
58 return data;
59 } catch (error) {
60 console.error('采集笔记数据失败:', error);
61 return null;
62 }
63 }
64
65 // 扫描页面并采集数据
66 function scanAndCollect() {
67 const noteSelectors = [
68 'section.note-item',
69 '.note-item',
70 '[class*="note-item"]',
71 '[class*="feed-item"]',
72 'a[href*="/explore/"]'
73 ];
74
75 let notes = [];
76 for (const selector of noteSelectors) {
77 const elements = document.querySelectorAll(selector);
78 if (elements.length > 0) {
79 notes = Array.from(elements);
80 console.log(`找到 ${notes.length} 个笔记元素,使用选择器: ${selector}`);
81 break;
82 }
83 }
84
85 let newCount = 0;
86 notes.forEach(note => {
87 const data = collectNoteData(note);
88 if (data) {
89 collectedData.push(data);
90 newCount++;
91 }
92 });
93
94 if (newCount > 0) {
95 updateUI();
96 console.log(`新采集 ${newCount} 条数据,总计 ${collectedData.length} 条`);
97 }
98 }
99
100 // 自动滚动
101 function autoScroll() {
102 const scrollHeight = document.documentElement.scrollHeight;
103 const currentScroll = window.pageYOffset + window.innerHeight;
104
105 if (currentScroll < scrollHeight - 100) {
106 window.scrollBy({
107 top: 800,
108 behavior: 'smooth'
109 });
110 console.log('自动滚动中...');
111 } else {
112 console.log('已滚动到底部,等待加载更多内容...');
113 // 等待新内容加载
114 setTimeout(() => {
115 const newScrollHeight = document.documentElement.scrollHeight;
116 if (newScrollHeight === scrollHeight) {
117 console.log('没有更多内容了');
118 }
119 }, 2000);
120 }
121 }
122
123 // 开始采集
124 function startCollecting() {
125 if (isCollecting) return;
126
127 isCollecting = true;
128 console.log('开始批量采集数据...');
129
130 // 立即扫描一次
131 scanAndCollect();
132
133 // 启动自动滚动
134 autoScrollInterval = setInterval(() => {
135 autoScroll();
136 scanAndCollect();
137 }, 3000);
138
139 updateUI();
140 }
141
142 // 停止采集
143 function stopCollecting() {
144 isCollecting = false;
145 if (autoScrollInterval) {
146 clearInterval(autoScrollInterval);
147 autoScrollInterval = null;
148 }
149 console.log('停止采集');
150 updateUI();
151 }
152
153 // 导出JSON
154 function exportJSON() {
155 if (collectedData.length === 0) {
156 alert('没有数据可导出!请先采集数据。');
157 return;
158 }
159
160 const dataStr = JSON.stringify(collectedData, null, 2);
161 const blob = new Blob([dataStr], { type: 'application/json' });
162 const url = URL.createObjectURL(blob);
163
164 const a = document.createElement('a');
165 a.href = url;
166 a.download = `xiaohongshu_data_${new Date().getTime()}.json`;
167 document.body.appendChild(a);
168 a.click();
169 document.body.removeChild(a);
170 URL.revokeObjectURL(url);
171
172 console.log(`成功导出 ${collectedData.length} 条数据`);
173 alert(`成功导出 ${collectedData.length} 条数据!`);
174 }
175
176 // 清空数据
177 function clearData() {
178 if (confirm(`确定要清空已采集的 ${collectedData.length} 条数据吗?`)) {
179 collectedData = [];
180 updateUI();
181 console.log('数据已清空');
182 }
183 }
184
185 // 更新UI显示
186 function updateUI() {
187 const countElement = document.getElementById('xhs-collector-count');
188 const statusElement = document.getElementById('xhs-collector-status');
189 const toggleBtn = document.getElementById('xhs-collector-toggle');
190
191 if (countElement) {
192 countElement.textContent = collectedData.length;
193 }
194
195 if (statusElement) {
196 statusElement.textContent = isCollecting ? '采集中...' : '已停止';
197 statusElement.style.color = isCollecting ? '#00ff00' : '#999';
198 }
199
200 if (toggleBtn) {
201 toggleBtn.textContent = isCollecting ? '⏸ 停止采集' : '▶ 开始采集';
202 toggleBtn.style.background = isCollecting ? '#ff4757' : '#2ed573';
203 }
204 }
205
206 // 创建控制面板
207 function createControlPanel() {
208 const panel = document.createElement('div');
209 panel.id = 'xhs-collector-panel';
210 panel.innerHTML = `
211 <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 15px; border-radius: 10px 10px 0 0; cursor: move; user-select: none;">
212 <div style="font-size: 16px; font-weight: bold; margin-bottom: 5px;">📊 数据采集器</div>
213 <div style="font-size: 12px; opacity: 0.9;">自动滚动 + 批量采集</div>
214 </div>
215 <div style="padding: 15px; background: white;">
216 <div style="display: flex; align-items: center; justify-content: space-between; margin-bottom: 15px; padding: 10px; background: #f8f9fa; border-radius: 8px;">
217 <span style="font-size: 14px; color: #333;">已采集:</span>
218 <span id="xhs-collector-count" style="font-size: 24px; font-weight: bold; color: #667eea;">0</span>
219 </div>
220 <div style="display: flex; align-items: center; justify-content: space-between; margin-bottom: 15px; padding: 8px; background: #f8f9fa; border-radius: 8px;">
221 <span style="font-size: 13px; color: #666;">状态:</span>
222 <span id="xhs-collector-status" style="font-size: 13px; color: #999;">已停止</span>
223 </div>
224 <button id="xhs-collector-toggle" style="width: 100%; padding: 12px; margin-bottom: 10px; background: #2ed573; color: white; border: none; border-radius: 8px; font-size: 14px; font-weight: bold; cursor: pointer; transition: all 0.3s;">
225 ▶ 开始采集
226 </button>
227 <button id="xhs-collector-export" style="width: 100%; padding: 12px; margin-bottom: 10px; background: #5352ed; color: white; border: none; border-radius: 8px; font-size: 14px; font-weight: bold; cursor: pointer; transition: all 0.3s;">
228 💾 导出JSON
229 </button>
230 <button id="xhs-collector-clear" style="width: 100%; padding: 10px; background: #ff6b6b; color: white; border: none; border-radius: 8px; font-size: 13px; cursor: pointer; transition: all 0.3s;">
231 🗑️ 清空数据
232 </button>
233 </div>
234 `;
235
236 // 样式
237 panel.style.cssText = `
238 position: fixed;
239 top: 100px;
240 right: 20px;
241 width: 280px;
242 background: white;
243 border-radius: 10px;
244 box-shadow: 0 10px 40px rgba(0,0,0,0.2);
245 z-index: 999999;
246 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
247 `;
248
249 document.body.appendChild(panel);
250
251 // 添加按钮悬停效果
252 const style = document.createElement('style');
253 style.textContent = `
254 #xhs-collector-panel button:hover {
255 transform: translateY(-2px);
256 box-shadow: 0 4px 12px rgba(0,0,0,0.15);
257 }
258 #xhs-collector-panel button:active {
259 transform: translateY(0);
260 }
261 `;
262 document.head.appendChild(style);
263
264 // 绑定事件
265 document.getElementById('xhs-collector-toggle').addEventListener('click', () => {
266 if (isCollecting) {
267 stopCollecting();
268 } else {
269 startCollecting();
270 }
271 });
272
273 document.getElementById('xhs-collector-export').addEventListener('click', exportJSON);
274 document.getElementById('xhs-collector-clear').addEventListener('click', clearData);
275
276 // 拖拽功能
277 let isDragging = false;
278 let currentX;
279 let currentY;
280 let initialX;
281 let initialY;
282
283 const header = panel.querySelector('div');
284 header.addEventListener('mousedown', (e) => {
285 isDragging = true;
286 initialX = e.clientX - panel.offsetLeft;
287 initialY = e.clientY - panel.offsetTop;
288 });
289
290 document.addEventListener('mousemove', (e) => {
291 if (isDragging) {
292 e.preventDefault();
293 currentX = e.clientX - initialX;
294 currentY = e.clientY - initialY;
295 panel.style.left = currentX + 'px';
296 panel.style.top = currentY + 'px';
297 panel.style.right = 'auto';
298 }
299 });
300
301 document.addEventListener('mouseup', () => {
302 isDragging = false;
303 });
304
305 console.log('数据采集器控制面板已创建');
306 }
307
308 // 监听DOM变化
309 const observer = new MutationObserver(debounce(() => {
310 if (isCollecting) {
311 scanAndCollect();
312 }
313 }, 1000));
314
315 // 初始化
316 function init() {
317 console.log('小红书批量数据采集器已启动');
318
319 // 等待页面加载完成
320 if (document.readyState === 'loading') {
321 document.addEventListener('DOMContentLoaded', () => {
322 setTimeout(createControlPanel, 1000);
323 });
324 } else {
325 setTimeout(createControlPanel, 1000);
326 }
327
328 // 开始监听DOM变化
329 observer.observe(document.body, {
330 childList: true,
331 subtree: true
332 });
333 }
334
335 init();
336})();