LoopNet Listings Extractor to Excel

A new extension

Size

18.3 KB

Version

1.0.1

Created

Dec 22, 2025

Updated

about 1 month ago

1// ==UserScript==
2// @name		LoopNet Listings Extractor to Excel
3// @description		A new extension
4// @version		1.0.1
5// @match		https://*.loopnet.com/*
6// @icon		https://www.loopnet.com/favicon.ico?v=a9e78ae7bf796dfbce4aef53e0a498ce
7// @require		https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js
8// ==/UserScript==
9(function() {
10    'use strict';
11
12    console.log('LoopNet Listings Extractor initialized');
13
14    // Utility function to wait for element
15    function waitForElement(selector, timeout = 10000) {
16        return new Promise((resolve, reject) => {
17            if (document.querySelector(selector)) {
18                return resolve(document.querySelector(selector));
19            }
20
21            const observer = new MutationObserver(() => {
22                if (document.querySelector(selector)) {
23                    observer.disconnect();
24                    resolve(document.querySelector(selector));
25                }
26            });
27
28            observer.observe(document.body, {
29                childList: true,
30                subtree: true
31            });
32
33            setTimeout(() => {
34                observer.disconnect();
35                reject(new Error(`Timeout waiting for ${selector}`));
36            }, timeout);
37        });
38    }
39
40    // Utility function to delay
41    function delay(ms) {
42        return new Promise(resolve => setTimeout(resolve, ms));
43    }
44
45    // Extract listing data from search results page
46    function extractListingsFromPage() {
47        const listings = [];
48        const articles = document.querySelectorAll('article.placard[data-id]');
49        
50        console.log(`Found ${articles.length} listings on current page`);
51        
52        articles.forEach(article => {
53            try {
54                const linkElement = article.querySelector('header h4 a[href*="/Listing/"]');
55                const titleElement = article.querySelector('header h4 a');
56                const subtitleElement = article.querySelector('header h6 a.subtitle-alpha');
57                const addressElement = article.querySelector('header a.subtitle-beta');
58                const priceElement = article.querySelector('li[name="Price"]');
59                const dataPoints = article.querySelectorAll('ul.data-points-2c li');
60                
61                let unitCount = '';
62                dataPoints.forEach(li => {
63                    const text = li.textContent.trim();
64                    if (text.includes('Room Hotel') || text.includes('Unit')) {
65                        unitCount = text;
66                    }
67                });
68                
69                const listing = {
70                    url: linkElement ? linkElement.href : '',
71                    title: titleElement ? titleElement.textContent.trim() : '',
72                    subtitle: subtitleElement ? subtitleElement.textContent.trim() : '',
73                    address: addressElement ? addressElement.textContent.trim().replace(/\s+/g, ' ') : '',
74                    price: priceElement ? priceElement.textContent.trim().replace(/\s+/g, '') : '',
75                    unitCount: unitCount
76                };
77                
78                if (listing.url) {
79                    listings.push(listing);
80                }
81            } catch (error) {
82                console.error('Error extracting listing:', error);
83            }
84        });
85        
86        return listings;
87    }
88
89    // Extract detailed information from individual listing page
90    async function extractListingDetails(url) {
91        console.log(`Fetching details from: ${url}`);
92        
93        try {
94            const response = await GM.xmlhttpRequest({
95                method: 'GET',
96                url: url,
97                timeout: 30000
98            });
99            
100            const parser = new DOMParser();
101            const doc = parser.parseFromString(response.responseText, 'text/html');
102            
103            // Extract Investment Highlights
104            let investmentHighlights = '';
105            const highlightsSection = doc.querySelector('section[data-section-name="Investment Highlights"]');
106            if (highlightsSection) {
107                const highlightsList = highlightsSection.querySelectorAll('ul li');
108                const highlights = Array.from(highlightsList).map(li => li.textContent.trim());
109                investmentHighlights = highlights.join('\n');
110            }
111            
112            // Extract Executive Summary
113            let executiveSummary = '';
114            const summarySection = doc.querySelector('section[data-section-name="Executive Summary"]');
115            if (summarySection) {
116                const summaryText = summarySection.querySelector('.property-data');
117                if (summaryText) {
118                    executiveSummary = summaryText.textContent.trim();
119                }
120            }
121            
122            // Extract Property Facts
123            let propertyFacts = '';
124            const factsSection = doc.querySelector('section[data-section-name="Property Facts"]');
125            if (factsSection) {
126                const factRows = factsSection.querySelectorAll('tr');
127                const facts = [];
128                factRows.forEach(row => {
129                    const label = row.querySelector('td:first-child');
130                    const value = row.querySelector('td:last-child');
131                    if (label && value) {
132                        facts.push(`${label.textContent.trim()}: ${value.textContent.trim()}`);
133                    }
134                });
135                propertyFacts = facts.join('\n');
136            }
137            
138            return {
139                investmentHighlights,
140                executiveSummary,
141                propertyFacts
142            };
143        } catch (error) {
144            console.error(`Error fetching details from ${url}:`, error);
145            return {
146                investmentHighlights: 'Error fetching',
147                executiveSummary: 'Error fetching',
148                propertyFacts: 'Error fetching'
149            };
150        }
151    }
152
153    // Navigate through all pages and collect listings
154    async function collectAllListings(statusCallback) {
155        const allListings = [];
156        let currentPage = 1;
157        const totalPages = 5; // Based on pagination showing 119 results / 25 per page
158        
159        statusCallback(`Extracting listings from page ${currentPage}/${totalPages}...`);
160        
161        // Extract from current page
162        const currentPageListings = extractListingsFromPage();
163        allListings.push(...currentPageListings);
164        
165        // Navigate to other pages
166        for (let page = 2; page <= totalPages; page++) {
167            statusCallback(`Navigating to page ${page}/${totalPages}...`);
168            
169            const nextPageLink = document.querySelector(`a[data-pg="${page}"]`);
170            if (nextPageLink) {
171                const pageUrl = nextPageLink.href;
172                
173                try {
174                    const response = await GM.xmlhttpRequest({
175                        method: 'GET',
176                        url: pageUrl,
177                        timeout: 30000
178                    });
179                    
180                    const parser = new DOMParser();
181                    const doc = parser.parseFromString(response.responseText, 'text/html');
182                    
183                    // Extract listings from this page
184                    const articles = doc.querySelectorAll('article.placard[data-id]');
185                    console.log(`Found ${articles.length} listings on page ${page}`);
186                    
187                    articles.forEach(article => {
188                        try {
189                            const linkElement = article.querySelector('header h4 a[href*="/Listing/"]');
190                            const titleElement = article.querySelector('header h4 a');
191                            const subtitleElement = article.querySelector('header h6 a.subtitle-alpha');
192                            const addressElement = article.querySelector('header a.subtitle-beta');
193                            const priceElement = article.querySelector('li[name="Price"]');
194                            const dataPoints = article.querySelectorAll('ul.data-points-2c li');
195                            
196                            let unitCount = '';
197                            dataPoints.forEach(li => {
198                                const text = li.textContent.trim();
199                                if (text.includes('Room Hotel') || text.includes('Unit')) {
200                                    unitCount = text;
201                                }
202                            });
203                            
204                            const listing = {
205                                url: linkElement ? linkElement.href : '',
206                                title: titleElement ? titleElement.textContent.trim() : '',
207                                subtitle: subtitleElement ? subtitleElement.textContent.trim() : '',
208                                address: addressElement ? addressElement.textContent.trim().replace(/\s+/g, ' ') : '',
209                                price: priceElement ? priceElement.textContent.trim().replace(/\s+/g, '') : '',
210                                unitCount: unitCount
211                            };
212                            
213                            if (listing.url) {
214                                allListings.push(listing);
215                            }
216                        } catch (error) {
217                            console.error('Error extracting listing:', error);
218                        }
219                    });
220                    
221                    await delay(1000); // Be respectful to the server
222                } catch (error) {
223                    console.error(`Error fetching page ${page}:`, error);
224                }
225            }
226        }
227        
228        return allListings;
229    }
230
231    // Fetch details for all listings
232    async function fetchAllDetails(listings, statusCallback) {
233        const detailedListings = [];
234        
235        for (let i = 0; i < listings.length; i++) {
236            const listing = listings[i];
237            statusCallback(`Fetching details for listing ${i + 1}/${listings.length}: ${listing.title}`);
238            
239            const details = await extractListingDetails(listing.url);
240            
241            detailedListings.push({
242                'Direct Listing Link': listing.url,
243                'Title': listing.title,
244                'Subtitle': listing.subtitle,
245                'Unit Count': listing.unitCount,
246                'Address': listing.address,
247                'Asking Price': listing.price,
248                'INVESTMENT HIGHLIGHTS': details.investmentHighlights,
249                'EXECUTIVE SUMMARY': details.executiveSummary,
250                'PROPERTY FACTS': details.propertyFacts
251            });
252            
253            // Add delay to avoid overwhelming the server
254            await delay(2000);
255        }
256        
257        return detailedListings;
258    }
259
260    // Generate Excel file
261    function generateExcel(data) {
262        console.log('Generating Excel file with', data.length, 'listings');
263        
264        const worksheet = XLSX.utils.json_to_sheet(data);
265        
266        // Set column widths
267        worksheet['!cols'] = [
268            { wch: 60 }, // Direct Listing Link
269            { wch: 40 }, // Title
270            { wch: 30 }, // Subtitle
271            { wch: 20 }, // Unit Count
272            { wch: 40 }, // Address
273            { wch: 15 }, // Asking Price
274            { wch: 50 }, // INVESTMENT HIGHLIGHTS
275            { wch: 50 }, // EXECUTIVE SUMMARY
276            { wch: 50 }  // PROPERTY FACTS
277        ];
278        
279        const workbook = XLSX.utils.book_new();
280        XLSX.utils.book_append_sheet(workbook, worksheet, 'LoopNet Listings');
281        
282        const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
283        const filename = `LoopNet_Listings_${timestamp}.xlsx`;
284        
285        XLSX.writeFile(workbook, filename);
286        console.log('Excel file generated:', filename);
287    }
288
289    // Create UI button
290    function createExtractButton() {
291        const button = document.createElement('button');
292        button.id = 'loopnet-extract-btn';
293        button.textContent = 'Extract All Listings to Excel';
294        button.style.cssText = `
295            position: fixed;
296            top: 100px;
297            right: 20px;
298            z-index: 10000;
299            padding: 12px 20px;
300            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
301            color: white;
302            border: none;
303            border-radius: 8px;
304            font-size: 14px;
305            font-weight: 600;
306            cursor: pointer;
307            box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
308            transition: all 0.3s ease;
309        `;
310        
311        button.addEventListener('mouseenter', () => {
312            button.style.transform = 'translateY(-2px)';
313            button.style.boxShadow = '0 6px 20px rgba(0, 0, 0, 0.3)';
314        });
315        
316        button.addEventListener('mouseleave', () => {
317            button.style.transform = 'translateY(0)';
318            button.style.boxShadow = '0 4px 15px rgba(0, 0, 0, 0.2)';
319        });
320        
321        button.addEventListener('click', startExtraction);
322        
323        document.body.appendChild(button);
324        console.log('Extract button added to page');
325    }
326
327    // Create status panel
328    function createStatusPanel() {
329        const panel = document.createElement('div');
330        panel.id = 'loopnet-status-panel';
331        panel.style.cssText = `
332            position: fixed;
333            top: 150px;
334            right: 20px;
335            z-index: 10000;
336            width: 350px;
337            padding: 20px;
338            background: white;
339            border-radius: 8px;
340            box-shadow: 0 4px 20px rgba(0, 0, 0, 0.15);
341            display: none;
342            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
343        `;
344        
345        panel.innerHTML = `
346            <div style="margin-bottom: 15px;">
347                <h3 style="margin: 0 0 10px 0; font-size: 16px; color: #333;">Extraction Progress</h3>
348                <div id="loopnet-status-text" style="font-size: 13px; color: #666; margin-bottom: 10px;">Initializing...</div>
349                <div style="width: 100%; height: 6px; background: #e0e0e0; border-radius: 3px; overflow: hidden;">
350                    <div id="loopnet-progress-bar" style="width: 0%; height: 100%; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); transition: width 0.3s ease;"></div>
351                </div>
352            </div>
353            <button id="loopnet-cancel-btn" style="width: 100%; padding: 8px; background: #f44336; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 13px; font-weight: 600;">Cancel</button>
354        `;
355        
356        document.body.appendChild(panel);
357        return panel;
358    }
359
360    // Main extraction function
361    let extractionCancelled = false;
362    
363    async function startExtraction() {
364        extractionCancelled = false;
365        
366        const button = document.getElementById('loopnet-extract-btn');
367        const panel = document.getElementById('loopnet-status-panel') || createStatusPanel();
368        const statusText = document.getElementById('loopnet-status-text');
369        const progressBar = document.getElementById('loopnet-progress-bar');
370        const cancelBtn = document.getElementById('loopnet-cancel-btn');
371        
372        button.disabled = true;
373        button.style.opacity = '0.5';
374        button.style.cursor = 'not-allowed';
375        panel.style.display = 'block';
376        
377        cancelBtn.onclick = () => {
378            extractionCancelled = true;
379            statusText.textContent = 'Cancelling...';
380        };
381        
382        const updateStatus = (message, progress = 0) => {
383            if (statusText) statusText.textContent = message;
384            if (progressBar) progressBar.style.width = `${progress}%`;
385        };
386        
387        try {
388            // Step 1: Collect all listings
389            updateStatus('Collecting listings from all pages...', 10);
390            const allListings = await collectAllListings(updateStatus);
391            
392            if (extractionCancelled) {
393                updateStatus('Extraction cancelled', 0);
394                return;
395            }
396            
397            console.log(`Total listings collected: ${allListings.length}`);
398            updateStatus(`Found ${allListings.length} listings. Fetching details...`, 30);
399            
400            // Step 2: Fetch details for each listing
401            const detailedListings = await fetchAllDetails(allListings, (msg) => {
402                if (extractionCancelled) return;
403                const match = msg.match(/(\d+)\/(\d+)/);
404                if (match) {
405                    const current = parseInt(match[1]);
406                    const total = parseInt(match[2]);
407                    const progress = 30 + (current / total) * 60;
408                    updateStatus(msg, progress);
409                }
410            });
411            
412            if (extractionCancelled) {
413                updateStatus('Extraction cancelled', 0);
414                return;
415            }
416            
417            // Step 3: Generate Excel
418            updateStatus('Generating Excel file...', 95);
419            generateExcel(detailedListings);
420            
421            updateStatus(`✓ Complete! Extracted ${detailedListings.length} listings to Excel`, 100);
422            
423            setTimeout(() => {
424                panel.style.display = 'none';
425                button.disabled = false;
426                button.style.opacity = '1';
427                button.style.cursor = 'pointer';
428            }, 3000);
429            
430        } catch (error) {
431            console.error('Extraction error:', error);
432            updateStatus(`Error: ${error.message}`, 0);
433            button.disabled = false;
434            button.style.opacity = '1';
435            button.style.cursor = 'pointer';
436        }
437    }
438
439    // Initialize
440    function init() {
441        // Only run on search results pages
442        if (window.location.href.includes('/search/')) {
443            console.log('On search results page, initializing...');
444            
445            // Wait for page to load
446            if (document.readyState === 'loading') {
447                document.addEventListener('DOMContentLoaded', () => {
448                    setTimeout(createExtractButton, 1000);
449                });
450            } else {
451                setTimeout(createExtractButton, 1000);
452            }
453        }
454    }
455
456    init();
457})();
LoopNet Listings Extractor to Excel | Robomonkey