Amazon Product Info Scraper

Scrapes product information including image, price, reviews, ratings, title, and size from Amazon product pages

Size

21.6 KB

Version

1.1.1

Created

Oct 30, 2025

Updated

about 1 month ago

1// ==UserScript==
2// @name		Amazon Product Info Scraper
3// @description		Scrapes product information including image, price, reviews, ratings, title, and size from Amazon product pages
4// @version		1.1.1
5// @match		https://*.amazon.com/*
6// @icon		https://www.perplexity.ai/favicon.ico
7// ==/UserScript==
8(function() {
9    'use strict';
10
11    // Debounce function to prevent excessive calls
12    function debounce(func, wait) {
13        let timeout;
14        return function executedFunction(...args) {
15            const later = () => {
16                clearTimeout(timeout);
17                func(...args);
18            };
19            clearTimeout(timeout);
20            timeout = setTimeout(later, wait);
21        };
22    }
23
24    // Function to scrape single product information (for product pages)
25    function scrapeProductInfo() {
26        console.log('Scraping Amazon product information...');
27        
28        const productData = {
29            title: null,
30            mainImage: null,
31            price: null,
32            rating: null,
33            reviewCount: null,
34            size: null
35        };
36
37        // Scrape product title
38        const titleElement = document.querySelector('#productTitle');
39        if (titleElement) {
40            productData.title = titleElement.textContent.trim();
41            console.log('Title found:', productData.title);
42        }
43
44        // Scrape main product image
45        const imageElement = document.querySelector('#landingImage, #imgBlkFront, #main-image-container img');
46        if (imageElement) {
47            productData.mainImage = imageElement.src || imageElement.getAttribute('data-old-hires');
48            console.log('Image found:', productData.mainImage);
49        }
50
51        // Scrape current price
52        const priceElement = document.querySelector('.a-price .a-offscreen');
53        if (priceElement) {
54            productData.price = priceElement.textContent.trim();
55            console.log('Price found:', productData.price);
56        }
57
58        // Scrape rating
59        const ratingElement = document.querySelector('[data-hook="rating-out-of-text"]');
60        if (ratingElement) {
61            productData.rating = ratingElement.textContent.trim();
62            console.log('Rating found:', productData.rating);
63        }
64
65        // Scrape number of reviews
66        const reviewElement = document.querySelector('#acrCustomerReviewText');
67        if (reviewElement) {
68            productData.reviewCount = reviewElement.textContent.trim();
69            console.log('Review count found:', productData.reviewCount);
70        }
71
72        // Scrape product size
73        const sizeElement = document.querySelector('#variation_size_name .selection, .po-size .a-span9, [class*="size"] .selection');
74        if (sizeElement) {
75            productData.size = sizeElement.textContent.trim();
76            console.log('Size found:', productData.size);
77        }
78
79        return productData;
80    }
81
82    // Function to scrape all products from search results page
83    function scrapeSearchResults() {
84        console.log('Scraping Amazon search results...');
85        
86        const products = [];
87        const productElements = document.querySelectorAll('[data-component-type="s-search-result"]');
88        
89        console.log(`Found ${productElements.length} products on search page`);
90
91        productElements.forEach((productElement, index) => {
92            const productData = {
93                title: null,
94                mainImage: null,
95                price: null,
96                rating: null,
97                reviewCount: null,
98                productUrl: null,
99                asin: null
100            };
101
102            // Get ASIN
103            productData.asin = productElement.getAttribute('data-asin');
104
105            // Get product title
106            const titleElement = productElement.querySelector('h2 a span, h2 span');
107            if (titleElement) {
108                productData.title = titleElement.textContent.trim();
109            }
110
111            // Get product URL
112            const linkElement = productElement.querySelector('h2 a[href*="/dp/"], a.s-no-outline[href*="/dp/"]');
113            if (linkElement) {
114                const href = linkElement.getAttribute('href');
115                productData.productUrl = href.startsWith('http') ? href : 'https://www.amazon.com' + href;
116            }
117
118            // Get product image
119            const imageElement = productElement.querySelector('img.s-image');
120            if (imageElement) {
121                productData.mainImage = imageElement.src || imageElement.getAttribute('data-image-latency');
122            }
123
124            // Get price
125            const priceElement = productElement.querySelector('.a-price .a-offscreen');
126            if (priceElement) {
127                productData.price = priceElement.textContent.trim();
128            }
129
130            // Get rating
131            const ratingElement = productElement.querySelector('[aria-label*="out of 5 stars"]');
132            if (ratingElement) {
133                productData.rating = ratingElement.getAttribute('aria-label');
134            }
135
136            // Get review count
137            const reviewElement = productElement.querySelector('[aria-label*="out of 5 stars"]');
138            if (reviewElement) {
139                const reviewText = reviewElement.parentElement?.nextElementSibling?.textContent;
140                if (reviewText) {
141                    productData.reviewCount = reviewText.trim();
142                }
143            }
144
145            // Only add products that have at least a title
146            if (productData.title) {
147                products.push(productData);
148                console.log(`Product ${index + 1} scraped:`, productData.title);
149            }
150        });
151
152        console.log(`Total products scraped: ${products.length}`);
153        return products;
154    }
155
156    // Function to create and display the scraper UI
157    function createScraperUI() {
158        // Check if UI already exists
159        if (document.getElementById('amazon-scraper-ui')) {
160            console.log('Scraper UI already exists');
161            return;
162        }
163
164        console.log('Creating scraper UI...');
165
166        const isSearchPage = isSearchResultsPage();
167
168        // Create container
169        const container = document.createElement('div');
170        container.id = 'amazon-scraper-ui';
171        container.style.cssText = `
172            position: fixed;
173            top: 20px;
174            right: 20px;
175            width: 400px;
176            background: white;
177            border: 2px solid #232f3e;
178            border-radius: 8px;
179            box-shadow: 0 4px 12px rgba(0,0,0,0.15);
180            z-index: 10000;
181            font-family: Arial, sans-serif;
182            max-height: 90vh;
183            overflow-y: auto;
184        `;
185
186        // Create header
187        const header = document.createElement('div');
188        header.style.cssText = `
189            background: #232f3e;
190            color: white;
191            padding: 12px 15px;
192            font-weight: bold;
193            font-size: 16px;
194            display: flex;
195            justify-content: space-between;
196            align-items: center;
197            border-radius: 6px 6px 0 0;
198        `;
199        header.innerHTML = `
200            <span>Amazon Product Scraper</span>
201            <button id="scraper-close-btn" style="background: transparent; border: none; color: white; font-size: 20px; cursor: pointer; padding: 0; width: 24px; height: 24px;">×</button>
202        `;
203
204        // Create content area
205        const content = document.createElement('div');
206        content.id = 'scraper-content';
207        content.style.cssText = `
208            padding: 15px;
209        `;
210
211        // Create scrape button
212        const scrapeButton = document.createElement('button');
213        scrapeButton.id = 'scrape-btn';
214        scrapeButton.textContent = isSearchPage ? 'Scrape All Products' : 'Scrape Product Info';
215        scrapeButton.style.cssText = `
216            width: 100%;
217            padding: 12px;
218            background: #ff9900;
219            color: #111;
220            border: none;
221            border-radius: 4px;
222            font-size: 14px;
223            font-weight: bold;
224            cursor: pointer;
225            margin-bottom: 15px;
226        `;
227        scrapeButton.onmouseover = () => scrapeButton.style.background = '#ffad33';
228        scrapeButton.onmouseout = () => scrapeButton.style.background = '#ff9900';
229
230        // Create results area
231        const results = document.createElement('div');
232        results.id = 'scraper-results';
233        results.style.cssText = `
234            font-size: 13px;
235            line-height: 1.6;
236        `;
237
238        // Assemble UI
239        content.appendChild(scrapeButton);
240        content.appendChild(results);
241        container.appendChild(header);
242        container.appendChild(content);
243        document.body.appendChild(container);
244
245        // Add event listeners
246        document.getElementById('scraper-close-btn').addEventListener('click', () => {
247            container.remove();
248        });
249
250        document.getElementById('scrape-btn').addEventListener('click', () => {
251            if (isSearchPage) {
252                displaySearchResults();
253            } else {
254                displayScrapedData();
255            }
256        });
257
258        console.log('Scraper UI created successfully');
259    }
260
261    // Function to display scraped data for single product
262    function displayScrapedData() {
263        const data = scrapeProductInfo();
264        const resultsDiv = document.getElementById('scraper-results');
265
266        if (!resultsDiv) {
267            console.error('Results div not found');
268            return;
269        }
270
271        let html = '<div style="background: #f0f2f2; padding: 12px; border-radius: 4px;">';
272
273        if (data.mainImage) {
274            html += `
275                <div style="margin-bottom: 12px; text-align: center;">
276                    <img src="${data.mainImage}" alt="Product" style="max-width: 100%; height: auto; border-radius: 4px; border: 1px solid #ddd;">
277                </div>
278            `;
279        }
280
281        html += '<div style="background: white; padding: 10px; border-radius: 4px; margin-bottom: 8px;">';
282        html += `<strong style="color: #232f3e;">Title:</strong><br><span style="color: #111;">${data.title || 'Not found'}</span>`;
283        html += '</div>';
284
285        html += '<div style="background: white; padding: 10px; border-radius: 4px; margin-bottom: 8px;">';
286        html += `<strong style="color: #232f3e;">Price:</strong><br><span style="color: #b12704; font-size: 18px; font-weight: bold;">${data.price || 'Not found'}</span>`;
287        html += '</div>';
288
289        html += '<div style="background: white; padding: 10px; border-radius: 4px; margin-bottom: 8px;">';
290        html += `<strong style="color: #232f3e;">Rating:</strong><br><span style="color: #111;">${data.rating || 'Not found'}</span>`;
291        html += '</div>';
292
293        html += '<div style="background: white; padding: 10px; border-radius: 4px; margin-bottom: 8px;">';
294        html += `<strong style="color: #232f3e;">Reviews:</strong><br><span style="color: #111;">${data.reviewCount || 'Not found'}</span>`;
295        html += '</div>';
296
297        html += '<div style="background: white; padding: 10px; border-radius: 4px; margin-bottom: 8px;">';
298        html += `<strong style="color: #232f3e;">Size:</strong><br><span style="color: #111;">${data.size || 'Not found'}</span>`;
299        html += '</div>';
300
301        // Add copy button
302        html += `
303            <button id="copy-data-btn" style="width: 100%; padding: 10px; background: #232f3e; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 13px; margin-top: 8px;">
304                Copy Data to Clipboard
305            </button>
306        `;
307
308        html += '</div>';
309
310        resultsDiv.innerHTML = html;
311
312        // Add copy functionality
313        const copyBtn = document.getElementById('copy-data-btn');
314        if (copyBtn) {
315            copyBtn.addEventListener('click', async () => {
316                const textData = `
317Product Information:
318-------------------
319Title: ${data.title || 'Not found'}
320Price: ${data.price || 'Not found'}
321Rating: ${data.rating || 'Not found'}
322Reviews: ${data.reviewCount || 'Not found'}
323Size: ${data.size || 'Not found'}
324Image URL: ${data.mainImage || 'Not found'}
325                `.trim();
326
327                try {
328                    await GM.setClipboard(textData);
329                    copyBtn.textContent = 'Copied!';
330                    copyBtn.style.background = '#067d62';
331                    setTimeout(() => {
332                        copyBtn.textContent = 'Copy Data to Clipboard';
333                        copyBtn.style.background = '#232f3e';
334                    }, 2000);
335                    console.log('Data copied to clipboard');
336                } catch (error) {
337                    console.error('Failed to copy to clipboard:', error);
338                    copyBtn.textContent = 'Copy Failed';
339                    setTimeout(() => {
340                        copyBtn.textContent = 'Copy Data to Clipboard';
341                    }, 2000);
342                }
343            });
344        }
345
346        console.log('Scraped data displayed');
347    }
348
349    // Function to display search results
350    function displaySearchResults() {
351        const products = scrapeSearchResults();
352        const resultsDiv = document.getElementById('scraper-results');
353
354        if (!resultsDiv) {
355            console.error('Results div not found');
356            return;
357        }
358
359        if (products.length === 0) {
360            resultsDiv.innerHTML = '<div style="padding: 10px; text-align: center; color: #666;">No products found on this page.</div>';
361            return;
362        }
363
364        let html = `<div style="background: #f0f2f2; padding: 12px; border-radius: 4px;">`;
365        html += `<div style="background: #232f3e; color: white; padding: 8px; border-radius: 4px; margin-bottom: 12px; text-align: center; font-weight: bold;">Found ${products.length} Products</div>`;
366
367        products.forEach((product, index) => {
368            html += `<div style="background: white; padding: 10px; border-radius: 4px; margin-bottom: 10px; border-left: 3px solid #ff9900;">`;
369            
370            if (product.mainImage) {
371                html += `<div style="text-align: center; margin-bottom: 8px;">
372                    <img src="${product.mainImage}" alt="Product" style="max-width: 120px; height: auto; border-radius: 4px;">
373                </div>`;
374            }
375
376            html += `<div style="font-weight: bold; color: #232f3e; margin-bottom: 5px; font-size: 12px;">Product ${index + 1}</div>`;
377            
378            if (product.title) {
379                html += `<div style="margin-bottom: 5px;"><strong style="color: #232f3e;">Title:</strong><br><span style="color: #111; font-size: 12px;">${product.title}</span></div>`;
380            }
381            
382            if (product.price) {
383                html += `<div style="margin-bottom: 5px;"><strong style="color: #232f3e;">Price:</strong> <span style="color: #b12704; font-weight: bold;">${product.price}</span></div>`;
384            }
385            
386            if (product.rating) {
387                html += `<div style="margin-bottom: 5px;"><strong style="color: #232f3e;">Rating:</strong> <span style="color: #111;">${product.rating}</span>`;
388                if (product.reviewCount) {
389                    html += ` <span style="color: #666;">${product.reviewCount}</span>`;
390                }
391                html += `</div>`;
392            }
393            
394            if (product.asin) {
395                html += `<div style="margin-bottom: 5px;"><strong style="color: #232f3e;">ASIN:</strong> <span style="color: #111; font-size: 11px;">${product.asin}</span></div>`;
396            }
397            
398            if (product.productUrl) {
399                html += `<div style="margin-top: 5px;"><a href="${product.productUrl}" target="_blank" style="color: #007185; font-size: 11px; text-decoration: none;">View Product →</a></div>`;
400            }
401            
402            html += `</div>`;
403        });
404
405        // Add export buttons
406        html += `
407            <div style="display: flex; gap: 8px; margin-top: 12px;">
408                <button id="copy-all-data-btn" style="flex: 1; padding: 10px; background: #232f3e; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">
409                    Copy All Data
410                </button>
411                <button id="export-csv-btn" style="flex: 1; padding: 10px; background: #067d62; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">
412                    Export as CSV
413                </button>
414            </div>
415        `;
416
417        html += '</div>';
418
419        resultsDiv.innerHTML = html;
420
421        // Add copy all functionality
422        const copyAllBtn = document.getElementById('copy-all-data-btn');
423        if (copyAllBtn) {
424            copyAllBtn.addEventListener('click', async () => {
425                let textData = `Amazon Search Results - ${products.length} Products\n`;
426                textData += '='.repeat(60) + '\n\n';
427                
428                products.forEach((product, index) => {
429                    textData += `Product ${index + 1}:\n`;
430                    textData += `Title: ${product.title || 'N/A'}\n`;
431                    textData += `Price: ${product.price || 'N/A'}\n`;
432                    textData += `Rating: ${product.rating || 'N/A'}\n`;
433                    textData += `Reviews: ${product.reviewCount || 'N/A'}\n`;
434                    textData += `ASIN: ${product.asin || 'N/A'}\n`;
435                    textData += `URL: ${product.productUrl || 'N/A'}\n`;
436                    textData += `Image: ${product.mainImage || 'N/A'}\n`;
437                    textData += '\n' + '-'.repeat(60) + '\n\n';
438                });
439
440                try {
441                    await GM.setClipboard(textData);
442                    copyAllBtn.textContent = 'Copied!';
443                    copyAllBtn.style.background = '#067d62';
444                    setTimeout(() => {
445                        copyAllBtn.textContent = 'Copy All Data';
446                        copyAllBtn.style.background = '#232f3e';
447                    }, 2000);
448                    console.log('All data copied to clipboard');
449                } catch (error) {
450                    console.error('Failed to copy to clipboard:', error);
451                    copyAllBtn.textContent = 'Copy Failed';
452                    setTimeout(() => {
453                        copyAllBtn.textContent = 'Copy All Data';
454                    }, 2000);
455                }
456            });
457        }
458
459        // Add CSV export functionality
460        const exportCsvBtn = document.getElementById('export-csv-btn');
461        if (exportCsvBtn) {
462            exportCsvBtn.addEventListener('click', () => {
463                let csv = 'Title,Price,Rating,Review Count,ASIN,Product URL,Image URL\n';
464                
465                products.forEach(product => {
466                    const row = [
467                        `"${(product.title || '').replace(/"/g, '""')}"`,
468                        `"${(product.price || '').replace(/"/g, '""')}"`,
469                        `"${(product.rating || '').replace(/"/g, '""')}"`,
470                        `"${(product.reviewCount || '').replace(/"/g, '""')}"`,
471                        `"${(product.asin || '').replace(/"/g, '""')}"`,
472                        `"${(product.productUrl || '').replace(/"/g, '""')}"`,
473                        `"${(product.mainImage || '').replace(/"/g, '""')}"`
474                    ];
475                    csv += row.join(',') + '\n';
476                });
477
478                // Create download
479                const blob = new Blob([csv], { type: 'text/csv' });
480                const url = URL.createObjectURL(blob);
481                const a = document.createElement('a');
482                a.href = url;
483                a.download = `amazon-products-${Date.now()}.csv`;
484                document.body.appendChild(a);
485                a.click();
486                document.body.removeChild(a);
487                URL.revokeObjectURL(url);
488
489                exportCsvBtn.textContent = 'Exported!';
490                setTimeout(() => {
491                    exportCsvBtn.textContent = 'Export as CSV';
492                }, 2000);
493                
494                console.log('CSV exported successfully');
495            });
496        }
497
498        console.log('Search results displayed');
499    }
500
501    // Check if we're on a product page
502    function isProductPage() {
503        return window.location.pathname.includes('/dp/') || 
504               window.location.pathname.includes('/gp/product/');
505    }
506
507    // Check if we're on a search results page
508    function isSearchResultsPage() {
509        return window.location.pathname.includes('/s') || 
510               window.location.search.includes('k=') ||
511               document.querySelector('[data-component-type="s-search-result"]') !== null;
512    }
513
514    // Initialize the scraper
515    function init() {
516        console.log('Amazon Product Scraper initialized');
517
518        if (isProductPage() || isSearchResultsPage()) {
519            console.log('Amazon page detected (product or search)');
520            
521            // Wait for page to load
522            if (document.readyState === 'loading') {
523                document.addEventListener('DOMContentLoaded', () => {
524                    setTimeout(createScraperUI, 1000);
525                });
526            } else {
527                setTimeout(createScraperUI, 1000);
528            }
529
530            // Watch for dynamic content changes
531            const observer = new MutationObserver(debounce(() => {
532                if (!document.getElementById('amazon-scraper-ui') && (isProductPage() || isSearchResultsPage())) {
533                    createScraperUI();
534                }
535            }, 1000));
536
537            observer.observe(document.body, {
538                childList: true,
539                subtree: true
540            });
541        } else {
542            console.log('Not a product or search page, scraper UI will not be shown');
543        }
544    }
545
546    // Start the extension
547    init();
548})();
Amazon Product Info Scraper | Robomonkey