Competitor Intelligence Crawler

Crawl toàn bộ nội dung text của website đối thủ và xuất ra file để phân tích bằng AI

Size

16.8 KB

Version

1.0.1

Created

Mar 28, 2026

Updated

19 days ago

1// ==UserScript==
2// @name		Competitor Intelligence Crawler
3// @description		Crawl toàn bộ nội dung text của website đối thủ và xuất ra file để phân tích bằng AI
4// @version		1.0.1
5// @match		https://*/*
6// @icon		https://www.google.com/s2/favicons?domain=google.com&sz=64
7// ==/UserScript==
8(function () {
9    'use strict';
10
11    // ===================== CONFIG =====================
12    const SHOP_BASE_URL = 'https://btfashion.vn/shop/';
13    const PRODUCTS_PER_PAGE = 12; // mặc định của site
14
15    // ===================== STATE =====================
16    let isRunning = false;
17    let allProducts = [];
18    let totalPages = 0;
19    let currentPage = 0;
20    let statusEl, progressEl, btnExport, btnCancel;
21
22    // ===================== UI =====================
23    function createUI() {
24        // Tạo floating button
25        const container = document.createElement('div');
26        container.id = 'btf-exporter';
27        container.style.cssText = `
28            position: fixed;
29            bottom: 30px;
30            right: 30px;
31            z-index: 99999;
32            font-family: Arial, sans-serif;
33            display: flex;
34            flex-direction: column;
35            align-items: flex-end;
36            gap: 8px;
37        `;
38
39        // Panel chính
40        const panel = document.createElement('div');
41        panel.id = 'btf-panel';
42        panel.style.cssText = `
43            background: #1a1a2e;
44            border: 2px solid #e94560;
45            border-radius: 12px;
46            padding: 16px 20px;
47            width: 280px;
48            box-shadow: 0 8px 32px rgba(0,0,0,0.4);
49            display: none;
50            flex-direction: column;
51            gap: 10px;
52        `;
53
54        panel.innerHTML = `
55            <div style="display:flex; align-items:center; justify-content:space-between;">
56                <span style="color:#e94560; font-weight:bold; font-size:14px;">📊 BT Fashion Exporter</span>
57                <span id="btf-close" style="color:#aaa; cursor:pointer; font-size:18px; line-height:1;"></span>
58            </div>
59            <div style="color:#ccc; font-size:12px; line-height:1.5;">
60                Xuất toàn bộ sản phẩm ra file Excel gồm:<br>
61                tên, giá, danh mục, SKU, size, link, ảnh
62            </div>
63            <div id="btf-status" style="color:#f0c040; font-size:12px; min-height:16px;"></div>
64            <div id="btf-progress-wrap" style="background:#333; border-radius:6px; height:8px; overflow:hidden; display:none;">
65                <div id="btf-progress-bar" style="height:100%; width:0%; background:#e94560; transition:width 0.3s;"></div>
66            </div>
67            <div id="btf-progress-text" style="color:#aaa; font-size:11px; text-align:center; display:none;"></div>
68            <div style="display:flex; gap:8px; margin-top:4px;">
69                <button id="btf-btn-export" style="
70                    flex:1; background:#e94560; color:#fff; border:none; border-radius:8px;
71                    padding:9px 0; font-size:13px; font-weight:bold; cursor:pointer;
72                ">🚀 Bắt đầu xuất</button>
73                <button id="btf-btn-cancel" style="
74                    flex:1; background:#555; color:#fff; border:none; border-radius:8px;
75                    padding:9px 0; font-size:13px; cursor:pointer; display:none;
76                ">⛔ Dừng</button>
77            </div>
78        `;
79
80        // Nút mở panel
81        const toggleBtn = document.createElement('button');
82        toggleBtn.id = 'btf-toggle';
83        toggleBtn.innerHTML = '📊';
84        toggleBtn.title = 'BT Fashion Exporter';
85        toggleBtn.style.cssText = `
86            background: #e94560;
87            color: #fff;
88            border: none;
89            border-radius: 50%;
90            width: 52px;
91            height: 52px;
92            font-size: 22px;
93            cursor: pointer;
94            box-shadow: 0 4px 16px rgba(233,69,96,0.5);
95            display: flex;
96            align-items: center;
97            justify-content: center;
98        `;
99
100        container.appendChild(panel);
101        container.appendChild(toggleBtn);
102        document.body.appendChild(container);
103
104        // Refs
105        statusEl = document.getElementById('btf-status');
106        progressEl = document.getElementById('btf-progress-bar');
107        btnExport = document.getElementById('btf-btn-export');
108        btnCancel = document.getElementById('btf-btn-cancel');
109
110        // Events
111        toggleBtn.addEventListener('click', () => {
112            const p = document.getElementById('btf-panel');
113            p.style.display = p.style.display === 'none' ? 'flex' : 'none';
114        });
115
116        document.getElementById('btf-close').addEventListener('click', () => {
117            document.getElementById('btf-panel').style.display = 'none';
118        });
119
120        btnExport.addEventListener('click', startExport);
121        btnCancel.addEventListener('click', stopExport);
122
123        console.log('[BTF Exporter] UI created');
124    }
125
126    // ===================== CORE LOGIC =====================
127
128    function setStatus(msg) {
129        if (statusEl) statusEl.textContent = msg;
130        console.log('[BTF Exporter]', msg);
131    }
132
133    function setProgress(current, total) {
134        const pct = total > 0 ? Math.round((current / total) * 100) : 0;
135        if (progressEl) progressEl.style.width = pct + '%';
136        const progressText = document.getElementById('btf-progress-text');
137        if (progressText) progressText.textContent = `${current} / ${total} trang (${pct}%)`;
138    }
139
140    function showProgress(show) {
141        const wrap = document.getElementById('btf-progress-wrap');
142        const text = document.getElementById('btf-progress-text');
143        if (wrap) wrap.style.display = show ? 'block' : 'none';
144        if (text) text.style.display = show ? 'block' : 'none';
145    }
146
147    async function fetchPage(pageNum) {
148        const url = pageNum === 1
149            ? `${SHOP_BASE_URL}`
150            : `${SHOP_BASE_URL}page/${pageNum}/`;
151        console.log(`[BTF Exporter] Fetching page ${pageNum}: ${url}`);
152        const response = await fetch(url);
153        const html = await response.text();
154        const parser = new DOMParser();
155        return parser.parseFromString(html, 'text/html');
156    }
157
158    function getTotalPages(doc) {
159        let maxPage = 1;
160        const pageNumbers = doc.querySelectorAll('.page-numbers a.page-number');
161        pageNumbers.forEach(p => {
162            const n = parseInt(p.textContent.trim());
163            if (!isNaN(n) && n > maxPage) maxPage = n;
164        });
165        console.log(`[BTF Exporter] Total pages detected: ${maxPage}`);
166        return maxPage;
167    }
168
169    function parseProductsFromPage(doc) {
170        const products = [];
171        const productEls = doc.querySelectorAll('.product');
172        productEls.forEach(el => {
173            try {
174                const linkEl = el.querySelector('a.woocommerce-loop-product__link, a[href*="/product/"]');
175                const titleEl = el.querySelector('.woocommerce-loop-product__title, .product-title');
176                const priceEls = el.querySelectorAll('.price .woocommerce-Price-amount bdi');
177                const imgEl = el.querySelector('img.wp-post-image, img.attachment-woocommerce_thumbnail');
178                const badgeEl = el.querySelector('.onsale');
179                const categoryEl = el.querySelector('.product-category a, .category');
180
181                // Lấy giá gốc và giá sale
182                let regularPrice = '';
183                let salePrice = '';
184                if (priceEls.length === 1) {
185                    regularPrice = priceEls[0].textContent.trim();
186                } else if (priceEls.length >= 2) {
187                    regularPrice = priceEls[0].textContent.trim();
188                    salePrice = priceEls[1].textContent.trim();
189                }
190
191                products.push({
192                    name: titleEl?.textContent?.trim() || '',
193                    link: linkEl?.href || '',
194                    regularPrice,
195                    salePrice,
196                    discount: badgeEl?.textContent?.trim() || '',
197                    image: imgEl?.src || '',
198                    category: categoryEl?.textContent?.trim() || '',
199                    sku: '',
200                    sizes: '',
201                    description: '',
202                });
203            } catch (e) {
204                console.error('[BTF Exporter] Error parsing product:', e);
205            }
206        });
207        console.log(`[BTF Exporter] Parsed ${products.length} products from page`);
208        return products;
209    }
210
211    async function enrichProduct(product) {
212        if (!product.link) return product;
213        try {
214            const response = await fetch(product.link);
215            const html = await response.text();
216            const parser = new DOMParser();
217            const doc = parser.parseFromString(html, 'text/html');
218
219            // SKU
220            const skuEl = doc.querySelector('.sku');
221            product.sku = skuEl?.textContent?.trim() || '';
222
223            // Danh mục chi tiết
224            const catEl = doc.querySelector('.posted_in');
225            if (catEl) {
226                product.category = catEl.textContent.replace('Danh mục:', '').trim();
227            }
228
229            // Tags
230            const tagEl = doc.querySelector('.tagged_as');
231            product.tags = tagEl ? tagEl.textContent.replace('Thẻ:', '').trim() : '';
232
233            // Sizes
234            const sizeOptions = doc.querySelectorAll('select[name="attribute_pa_size"] option[value]:not([value=""])');
235            if (sizeOptions.length > 0) {
236                product.sizes = [...sizeOptions].map(o => o.textContent.trim()).join(', ');
237            }
238
239            // Colors
240            const colorOptions = doc.querySelectorAll('select[name="attribute_pa_color"] option[value]:not([value=""])');
241            if (colorOptions.length > 0) {
242                product.colors = [...colorOptions].map(o => o.textContent.trim()).join(', ');
243            } else {
244                product.colors = '';
245            }
246
247            // Mô tả ngắn
248            const descEl = doc.querySelector('.woocommerce-product-details__short-description, .product-short-description');
249            product.description = descEl?.textContent?.trim()?.replace(/\s+/g, ' ')?.substring(0, 300) || '';
250
251            // Giá chính xác từ trang chi tiết
252            const priceEls = doc.querySelectorAll('.summary .price .woocommerce-Price-amount bdi');
253            if (priceEls.length === 1) {
254                product.regularPrice = priceEls[0].textContent.trim();
255                product.salePrice = '';
256            } else if (priceEls.length >= 2) {
257                product.regularPrice = priceEls[0].textContent.trim();
258                product.salePrice = priceEls[1].textContent.trim();
259            }
260
261        } catch (e) {
262            console.error('[BTF Exporter] Error enriching product:', product.link, e);
263        }
264        return product;
265    }
266
267    async function startExport() {
268        if (isRunning) return;
269        isRunning = true;
270        allProducts = [];
271        currentPage = 0;
272
273        btnExport.style.display = 'none';
274        btnCancel.style.display = 'block';
275        showProgress(true);
276        setStatus('Đang quét danh sách sản phẩm...');
277
278        try {
279            // Bước 1: Lấy tổng số trang
280            const firstDoc = await fetchPage(1);
281            totalPages = getTotalPages(firstDoc);
282            setStatus(`Tìm thấy ${totalPages} trang. Đang thu thập...`);
283
284            // Bước 2: Thu thập tất cả sản phẩm từ listing pages
285            const listingProducts = parseProductsFromPage(firstDoc);
286            allProducts.push(...listingProducts);
287            currentPage = 1;
288            setProgress(currentPage, totalPages);
289
290            for (let page = 2; page <= totalPages; page++) {
291                if (!isRunning) break;
292                const doc = await fetchPage(page);
293                const products = parseProductsFromPage(doc);
294                allProducts.push(...products);
295                currentPage = page;
296                setProgress(currentPage, totalPages);
297                setStatus(`Đã thu thập ${allProducts.length} sản phẩm (trang ${page}/${totalPages})...`);
298                await sleep(300); // tránh spam server
299            }
300
301            if (!isRunning) {
302                setStatus('Đã dừng. Xuất dữ liệu đã thu thập...');
303            } else {
304                setStatus(`Thu thập xong ${allProducts.length} sản phẩm. Đang lấy chi tiết...`);
305            }
306
307            // Bước 3: Enrich từng sản phẩm (lấy SKU, size, mô tả...)
308            showProgress(true);
309            for (let i = 0; i < allProducts.length; i++) {
310                if (!isRunning && i > 0) break;
311                await enrichProduct(allProducts[i]);
312                if (progressEl) progressEl.style.width = Math.round(((i + 1) / allProducts.length) * 100) + '%';
313                const progressText = document.getElementById('btf-progress-text');
314                if (progressText) progressText.textContent = `Chi tiết: ${i + 1} / ${allProducts.length} sản phẩm`;
315                setStatus(`Đang lấy chi tiết ${i + 1}/${allProducts.length}...`);
316                await sleep(200);
317            }
318
319            setStatus('Hoàn tất! Đang tạo file Excel...');
320            exportToExcel(allProducts);
321            setStatus(`✅ Đã xuất ${allProducts.length} sản phẩm ra Excel!`);
322
323        } catch (e) {
324            console.error('[BTF Exporter] Export error:', e);
325            setStatus('❌ Lỗi: ' + e.message);
326        }
327
328        isRunning = false;
329        btnExport.style.display = 'block';
330        btnCancel.style.display = 'none';
331    }
332
333    function stopExport() {
334        isRunning = false;
335        setStatus('Đang dừng...');
336        btnCancel.style.display = 'none';
337        btnExport.style.display = 'block';
338    }
339
340    function sleep(ms) {
341        return new Promise(resolve => setTimeout(resolve, ms));
342    }
343
344    // ===================== EXCEL EXPORT =====================
345    function exportToExcel(products) {
346        console.log('[BTF Exporter] Exporting', products.length, 'products to Excel');
347
348        const headers = [
349            'STT',
350            'Tên sản phẩm',
351            'Giá gốc',
352            'Giá sale',
353            'Giảm giá',
354            'SKU',
355            'Danh mục',
356            'Tags',
357            'Sizes',
358            'Màu sắc',
359            'Mô tả ngắn',
360            'Link sản phẩm',
361            'Ảnh sản phẩm'
362        ];
363
364        const rows = products.map((p, i) => [
365            i + 1,
366            p.name || '',
367            p.regularPrice || '',
368            p.salePrice || '',
369            p.discount || '',
370            p.sku || '',
371            p.category || '',
372            p.tags || '',
373            p.sizes || '',
374            p.colors || '',
375            p.description || '',
376            p.link || '',
377            p.image || ''
378        ]);
379
380        const wsData = [headers, ...rows];
381        const wb = XLSX.utils.book_new();
382        const ws = XLSX.utils.aoa_to_sheet(wsData);
383
384        // Style cột
385        const colWidths = [6, 40, 18, 18, 10, 15, 30, 25, 20, 20, 60, 50, 60];
386        ws['!cols'] = colWidths.map(w => ({ wch: w }));
387
388        XLSX.utils.book_append_sheet(wb, ws, 'Sản phẩm BT Fashion');
389
390        // Sheet tổng hợp
391        const summaryData = [
392            ['BÁO CÁO PHÂN TÍCH SẢN PHẨM BT FASHION'],
393            [''],
394            ['Tổng số sản phẩm', products.length],
395            ['Sản phẩm đang giảm giá', products.filter(p => p.salePrice).length],
396            ['Ngày xuất', new Date().toLocaleDateString('vi-VN')],
397            ['Giờ xuất', new Date().toLocaleTimeString('vi-VN')],
398            [''],
399            ['PHÂN BỔ DANH MỤC'],
400        ];
401
402        // Đếm theo danh mục
403        const catCount = {};
404        products.forEach(p => {
405            const cats = (p.category || 'Chưa phân loại').split(',');
406            cats.forEach(c => {
407                const cat = c.trim();
408                catCount[cat] = (catCount[cat] || 0) + 1;
409            });
410        });
411        Object.entries(catCount).sort((a, b) => b[1] - a[1]).forEach(([cat, count]) => {
412            summaryData.push([cat, count]);
413        });
414
415        const wsSummary = XLSX.utils.aoa_to_sheet(summaryData);
416        wsSummary['!cols'] = [{ wch: 35 }, { wch: 15 }];
417        XLSX.utils.book_append_sheet(wb, wsSummary, 'Tổng hợp');
418
419        const filename = `BTFashion_Products_${new Date().toISOString().slice(0, 10)}.xlsx`;
420        XLSX.writeFile(wb, filename);
421        console.log('[BTF Exporter] File saved:', filename);
422    }
423
424    // ===================== INIT =====================
425    function init() {
426        console.log('[BTF Exporter] Initializing...');
427        createUI();
428        console.log('[BTF Exporter] Ready! Click the 📊 button to start.');
429    }
430
431    if (document.readyState === 'loading') {
432        document.addEventListener('DOMContentLoaded', init);
433    } else {
434        init();
435    }
436
437})();
Competitor Intelligence Crawler | Robomonkey