pupeteer script to extract urls for a given search query
constbrowser=awaitpuppeteer.launch();constpage=awaitbrowser.newPage();awaitpage.goto('https://google.com',{waitUntil:'networkidle2'});awaitpage.waitFor('input[name=q]');// Type our query into the search bar
awaitpage.type('input[name=q]','epub reader windows');awaitpage.click('input[type="submit"]');// Wait for the results to show up
awaitpage.waitForSelector('h3 a');// Extract the results from the page
constlinks=awaitpage.evaluate(()=>{constanchors=Array.from(document.querySelectorAll('h3.r a'));returnanchors.map(anchor=>anchor.href);});console.log(links.join('\n'));awaitbrowser.close();
pupeteer script to extract first 100 search results for query
constbrowser=awaitpuppeteer.launch();constpage=awaitbrowser.newPage();// https://www.google.com/search?q=epub+reader&start=10
functionbuildQuery(term,page=0){constq=encodeURIComponent(term).replace("%20","+");leturl='https://google.com/search?q='+q;if(page>0){conststart=page*10;url+='&start='+start;}returnurl;}asyncfunctionextractLinks(url){console.log(`url: ${url}`);awaitpage.goto(url,{waitUntil:'networkidle2'});// Wait for the results to show up
awaitpage.waitForSelector('h3 a');// Extract the results from the page
constlinks=awaitpage.evaluate(()=>{constanchors=Array.from(document.querySelectorAll('h3.r a'));returnanchors.map(anchor=>anchor.href);});returnlinks;}asyncfunctionextractFirst100(term){constmaxPages=1;// 10
for(varpageNo=0;pageNo<maxPages;pageNo++){consturl=buildQuery(term,pageNo);links=awaitextractLinks(url);console.log(links.join('\n'));}}asyncfunctionextractFirst100v2(term){constmaxPages=3;// 10
leta=Array(maxPages);for(varpageNo=0;pageNo<maxPages;pageNo++){consturl=buildQuery(term,pageNo);a[pageNo]=extractLinks(url);}constres=awaitPromise.all(a);for(varpageNo=0;pageNo<maxPages;pageNo++){constlinks=res[pageNo];console.log(links.join('\n'));}}awaitextractFirst100("epub reader");awaitbrowser.close();