Crawl toàn bộ nội dung text của website đối thủ và xuất ra file để phân tích bằng AI
Size
16.8 KB
Version
1.0.1
Created
Mar 28, 2026
Updated
19 days ago
1// ==UserScript==
2// @name Competitor Intelligence Crawler
3// @description Crawl toàn bộ nội dung text của website đối thủ và xuất ra file để phân tích bằng AI
4// @version 1.0.1
5// @match https://*/*
6// @icon https://www.google.com/s2/favicons?domain=google.com&sz=64
7// ==/UserScript==
8(function () {
9 'use strict';
10
11 // ===================== CONFIG =====================
12 const SHOP_BASE_URL = 'https://btfashion.vn/shop/';
13 const PRODUCTS_PER_PAGE = 12; // mặc định của site
14
15 // ===================== STATE =====================
16 let isRunning = false;
17 let allProducts = [];
18 let totalPages = 0;
19 let currentPage = 0;
20 let statusEl, progressEl, btnExport, btnCancel;
21
22 // ===================== UI =====================
23 function createUI() {
24 // Tạo floating button
25 const container = document.createElement('div');
26 container.id = 'btf-exporter';
27 container.style.cssText = `
28 position: fixed;
29 bottom: 30px;
30 right: 30px;
31 z-index: 99999;
32 font-family: Arial, sans-serif;
33 display: flex;
34 flex-direction: column;
35 align-items: flex-end;
36 gap: 8px;
37 `;
38
39 // Panel chính
40 const panel = document.createElement('div');
41 panel.id = 'btf-panel';
42 panel.style.cssText = `
43 background: #1a1a2e;
44 border: 2px solid #e94560;
45 border-radius: 12px;
46 padding: 16px 20px;
47 width: 280px;
48 box-shadow: 0 8px 32px rgba(0,0,0,0.4);
49 display: none;
50 flex-direction: column;
51 gap: 10px;
52 `;
53
54 panel.innerHTML = `
55 <div style="display:flex; align-items:center; justify-content:space-between;">
56 <span style="color:#e94560; font-weight:bold; font-size:14px;">📊 BT Fashion Exporter</span>
57 <span id="btf-close" style="color:#aaa; cursor:pointer; font-size:18px; line-height:1;">✕</span>
58 </div>
59 <div style="color:#ccc; font-size:12px; line-height:1.5;">
60 Xuất toàn bộ sản phẩm ra file Excel gồm:<br>
61 tên, giá, danh mục, SKU, size, link, ảnh
62 </div>
63 <div id="btf-status" style="color:#f0c040; font-size:12px; min-height:16px;"></div>
64 <div id="btf-progress-wrap" style="background:#333; border-radius:6px; height:8px; overflow:hidden; display:none;">
65 <div id="btf-progress-bar" style="height:100%; width:0%; background:#e94560; transition:width 0.3s;"></div>
66 </div>
67 <div id="btf-progress-text" style="color:#aaa; font-size:11px; text-align:center; display:none;"></div>
68 <div style="display:flex; gap:8px; margin-top:4px;">
69 <button id="btf-btn-export" style="
70 flex:1; background:#e94560; color:#fff; border:none; border-radius:8px;
71 padding:9px 0; font-size:13px; font-weight:bold; cursor:pointer;
72 ">🚀 Bắt đầu xuất</button>
73 <button id="btf-btn-cancel" style="
74 flex:1; background:#555; color:#fff; border:none; border-radius:8px;
75 padding:9px 0; font-size:13px; cursor:pointer; display:none;
76 ">⛔ Dừng</button>
77 </div>
78 `;
79
80 // Nút mở panel
81 const toggleBtn = document.createElement('button');
82 toggleBtn.id = 'btf-toggle';
83 toggleBtn.innerHTML = '📊';
84 toggleBtn.title = 'BT Fashion Exporter';
85 toggleBtn.style.cssText = `
86 background: #e94560;
87 color: #fff;
88 border: none;
89 border-radius: 50%;
90 width: 52px;
91 height: 52px;
92 font-size: 22px;
93 cursor: pointer;
94 box-shadow: 0 4px 16px rgba(233,69,96,0.5);
95 display: flex;
96 align-items: center;
97 justify-content: center;
98 `;
99
100 container.appendChild(panel);
101 container.appendChild(toggleBtn);
102 document.body.appendChild(container);
103
104 // Refs
105 statusEl = document.getElementById('btf-status');
106 progressEl = document.getElementById('btf-progress-bar');
107 btnExport = document.getElementById('btf-btn-export');
108 btnCancel = document.getElementById('btf-btn-cancel');
109
110 // Events
111 toggleBtn.addEventListener('click', () => {
112 const p = document.getElementById('btf-panel');
113 p.style.display = p.style.display === 'none' ? 'flex' : 'none';
114 });
115
116 document.getElementById('btf-close').addEventListener('click', () => {
117 document.getElementById('btf-panel').style.display = 'none';
118 });
119
120 btnExport.addEventListener('click', startExport);
121 btnCancel.addEventListener('click', stopExport);
122
123 console.log('[BTF Exporter] UI created');
124 }
125
126 // ===================== CORE LOGIC =====================
127
128 function setStatus(msg) {
129 if (statusEl) statusEl.textContent = msg;
130 console.log('[BTF Exporter]', msg);
131 }
132
133 function setProgress(current, total) {
134 const pct = total > 0 ? Math.round((current / total) * 100) : 0;
135 if (progressEl) progressEl.style.width = pct + '%';
136 const progressText = document.getElementById('btf-progress-text');
137 if (progressText) progressText.textContent = `${current} / ${total} trang (${pct}%)`;
138 }
139
140 function showProgress(show) {
141 const wrap = document.getElementById('btf-progress-wrap');
142 const text = document.getElementById('btf-progress-text');
143 if (wrap) wrap.style.display = show ? 'block' : 'none';
144 if (text) text.style.display = show ? 'block' : 'none';
145 }
146
147 async function fetchPage(pageNum) {
148 const url = pageNum === 1
149 ? `${SHOP_BASE_URL}`
150 : `${SHOP_BASE_URL}page/${pageNum}/`;
151 console.log(`[BTF Exporter] Fetching page ${pageNum}: ${url}`);
152 const response = await fetch(url);
153 const html = await response.text();
154 const parser = new DOMParser();
155 return parser.parseFromString(html, 'text/html');
156 }
157
158 function getTotalPages(doc) {
159 let maxPage = 1;
160 const pageNumbers = doc.querySelectorAll('.page-numbers a.page-number');
161 pageNumbers.forEach(p => {
162 const n = parseInt(p.textContent.trim());
163 if (!isNaN(n) && n > maxPage) maxPage = n;
164 });
165 console.log(`[BTF Exporter] Total pages detected: ${maxPage}`);
166 return maxPage;
167 }
168
169 function parseProductsFromPage(doc) {
170 const products = [];
171 const productEls = doc.querySelectorAll('.product');
172 productEls.forEach(el => {
173 try {
174 const linkEl = el.querySelector('a.woocommerce-loop-product__link, a[href*="/product/"]');
175 const titleEl = el.querySelector('.woocommerce-loop-product__title, .product-title');
176 const priceEls = el.querySelectorAll('.price .woocommerce-Price-amount bdi');
177 const imgEl = el.querySelector('img.wp-post-image, img.attachment-woocommerce_thumbnail');
178 const badgeEl = el.querySelector('.onsale');
179 const categoryEl = el.querySelector('.product-category a, .category');
180
181 // Lấy giá gốc và giá sale
182 let regularPrice = '';
183 let salePrice = '';
184 if (priceEls.length === 1) {
185 regularPrice = priceEls[0].textContent.trim();
186 } else if (priceEls.length >= 2) {
187 regularPrice = priceEls[0].textContent.trim();
188 salePrice = priceEls[1].textContent.trim();
189 }
190
191 products.push({
192 name: titleEl?.textContent?.trim() || '',
193 link: linkEl?.href || '',
194 regularPrice,
195 salePrice,
196 discount: badgeEl?.textContent?.trim() || '',
197 image: imgEl?.src || '',
198 category: categoryEl?.textContent?.trim() || '',
199 sku: '',
200 sizes: '',
201 description: '',
202 });
203 } catch (e) {
204 console.error('[BTF Exporter] Error parsing product:', e);
205 }
206 });
207 console.log(`[BTF Exporter] Parsed ${products.length} products from page`);
208 return products;
209 }
210
211 async function enrichProduct(product) {
212 if (!product.link) return product;
213 try {
214 const response = await fetch(product.link);
215 const html = await response.text();
216 const parser = new DOMParser();
217 const doc = parser.parseFromString(html, 'text/html');
218
219 // SKU
220 const skuEl = doc.querySelector('.sku');
221 product.sku = skuEl?.textContent?.trim() || '';
222
223 // Danh mục chi tiết
224 const catEl = doc.querySelector('.posted_in');
225 if (catEl) {
226 product.category = catEl.textContent.replace('Danh mục:', '').trim();
227 }
228
229 // Tags
230 const tagEl = doc.querySelector('.tagged_as');
231 product.tags = tagEl ? tagEl.textContent.replace('Thẻ:', '').trim() : '';
232
233 // Sizes
234 const sizeOptions = doc.querySelectorAll('select[name="attribute_pa_size"] option[value]:not([value=""])');
235 if (sizeOptions.length > 0) {
236 product.sizes = [...sizeOptions].map(o => o.textContent.trim()).join(', ');
237 }
238
239 // Colors
240 const colorOptions = doc.querySelectorAll('select[name="attribute_pa_color"] option[value]:not([value=""])');
241 if (colorOptions.length > 0) {
242 product.colors = [...colorOptions].map(o => o.textContent.trim()).join(', ');
243 } else {
244 product.colors = '';
245 }
246
247 // Mô tả ngắn
248 const descEl = doc.querySelector('.woocommerce-product-details__short-description, .product-short-description');
249 product.description = descEl?.textContent?.trim()?.replace(/\s+/g, ' ')?.substring(0, 300) || '';
250
251 // Giá chính xác từ trang chi tiết
252 const priceEls = doc.querySelectorAll('.summary .price .woocommerce-Price-amount bdi');
253 if (priceEls.length === 1) {
254 product.regularPrice = priceEls[0].textContent.trim();
255 product.salePrice = '';
256 } else if (priceEls.length >= 2) {
257 product.regularPrice = priceEls[0].textContent.trim();
258 product.salePrice = priceEls[1].textContent.trim();
259 }
260
261 } catch (e) {
262 console.error('[BTF Exporter] Error enriching product:', product.link, e);
263 }
264 return product;
265 }
266
267 async function startExport() {
268 if (isRunning) return;
269 isRunning = true;
270 allProducts = [];
271 currentPage = 0;
272
273 btnExport.style.display = 'none';
274 btnCancel.style.display = 'block';
275 showProgress(true);
276 setStatus('Đang quét danh sách sản phẩm...');
277
278 try {
279 // Bước 1: Lấy tổng số trang
280 const firstDoc = await fetchPage(1);
281 totalPages = getTotalPages(firstDoc);
282 setStatus(`Tìm thấy ${totalPages} trang. Đang thu thập...`);
283
284 // Bước 2: Thu thập tất cả sản phẩm từ listing pages
285 const listingProducts = parseProductsFromPage(firstDoc);
286 allProducts.push(...listingProducts);
287 currentPage = 1;
288 setProgress(currentPage, totalPages);
289
290 for (let page = 2; page <= totalPages; page++) {
291 if (!isRunning) break;
292 const doc = await fetchPage(page);
293 const products = parseProductsFromPage(doc);
294 allProducts.push(...products);
295 currentPage = page;
296 setProgress(currentPage, totalPages);
297 setStatus(`Đã thu thập ${allProducts.length} sản phẩm (trang ${page}/${totalPages})...`);
298 await sleep(300); // tránh spam server
299 }
300
301 if (!isRunning) {
302 setStatus('Đã dừng. Xuất dữ liệu đã thu thập...');
303 } else {
304 setStatus(`Thu thập xong ${allProducts.length} sản phẩm. Đang lấy chi tiết...`);
305 }
306
307 // Bước 3: Enrich từng sản phẩm (lấy SKU, size, mô tả...)
308 showProgress(true);
309 for (let i = 0; i < allProducts.length; i++) {
310 if (!isRunning && i > 0) break;
311 await enrichProduct(allProducts[i]);
312 if (progressEl) progressEl.style.width = Math.round(((i + 1) / allProducts.length) * 100) + '%';
313 const progressText = document.getElementById('btf-progress-text');
314 if (progressText) progressText.textContent = `Chi tiết: ${i + 1} / ${allProducts.length} sản phẩm`;
315 setStatus(`Đang lấy chi tiết ${i + 1}/${allProducts.length}...`);
316 await sleep(200);
317 }
318
319 setStatus('Hoàn tất! Đang tạo file Excel...');
320 exportToExcel(allProducts);
321 setStatus(`✅ Đã xuất ${allProducts.length} sản phẩm ra Excel!`);
322
323 } catch (e) {
324 console.error('[BTF Exporter] Export error:', e);
325 setStatus('❌ Lỗi: ' + e.message);
326 }
327
328 isRunning = false;
329 btnExport.style.display = 'block';
330 btnCancel.style.display = 'none';
331 }
332
333 function stopExport() {
334 isRunning = false;
335 setStatus('Đang dừng...');
336 btnCancel.style.display = 'none';
337 btnExport.style.display = 'block';
338 }
339
340 function sleep(ms) {
341 return new Promise(resolve => setTimeout(resolve, ms));
342 }
343
344 // ===================== EXCEL EXPORT =====================
345 function exportToExcel(products) {
346 console.log('[BTF Exporter] Exporting', products.length, 'products to Excel');
347
348 const headers = [
349 'STT',
350 'Tên sản phẩm',
351 'Giá gốc',
352 'Giá sale',
353 'Giảm giá',
354 'SKU',
355 'Danh mục',
356 'Tags',
357 'Sizes',
358 'Màu sắc',
359 'Mô tả ngắn',
360 'Link sản phẩm',
361 'Ảnh sản phẩm'
362 ];
363
364 const rows = products.map((p, i) => [
365 i + 1,
366 p.name || '',
367 p.regularPrice || '',
368 p.salePrice || '',
369 p.discount || '',
370 p.sku || '',
371 p.category || '',
372 p.tags || '',
373 p.sizes || '',
374 p.colors || '',
375 p.description || '',
376 p.link || '',
377 p.image || ''
378 ]);
379
380 const wsData = [headers, ...rows];
381 const wb = XLSX.utils.book_new();
382 const ws = XLSX.utils.aoa_to_sheet(wsData);
383
384 // Style cột
385 const colWidths = [6, 40, 18, 18, 10, 15, 30, 25, 20, 20, 60, 50, 60];
386 ws['!cols'] = colWidths.map(w => ({ wch: w }));
387
388 XLSX.utils.book_append_sheet(wb, ws, 'Sản phẩm BT Fashion');
389
390 // Sheet tổng hợp
391 const summaryData = [
392 ['BÁO CÁO PHÂN TÍCH SẢN PHẨM BT FASHION'],
393 [''],
394 ['Tổng số sản phẩm', products.length],
395 ['Sản phẩm đang giảm giá', products.filter(p => p.salePrice).length],
396 ['Ngày xuất', new Date().toLocaleDateString('vi-VN')],
397 ['Giờ xuất', new Date().toLocaleTimeString('vi-VN')],
398 [''],
399 ['PHÂN BỔ DANH MỤC'],
400 ];
401
402 // Đếm theo danh mục
403 const catCount = {};
404 products.forEach(p => {
405 const cats = (p.category || 'Chưa phân loại').split(',');
406 cats.forEach(c => {
407 const cat = c.trim();
408 catCount[cat] = (catCount[cat] || 0) + 1;
409 });
410 });
411 Object.entries(catCount).sort((a, b) => b[1] - a[1]).forEach(([cat, count]) => {
412 summaryData.push([cat, count]);
413 });
414
415 const wsSummary = XLSX.utils.aoa_to_sheet(summaryData);
416 wsSummary['!cols'] = [{ wch: 35 }, { wch: 15 }];
417 XLSX.utils.book_append_sheet(wb, wsSummary, 'Tổng hợp');
418
419 const filename = `BTFashion_Products_${new Date().toISOString().slice(0, 10)}.xlsx`;
420 XLSX.writeFile(wb, filename);
421 console.log('[BTF Exporter] File saved:', filename);
422 }
423
424 // ===================== INIT =====================
425 function init() {
426 console.log('[BTF Exporter] Initializing...');
427 createUI();
428 console.log('[BTF Exporter] Ready! Click the 📊 button to start.');
429 }
430
431 if (document.readyState === 'loading') {
432 document.addEventListener('DOMContentLoaded', init);
433 } else {
434 init();
435 }
436
437})();