Home / Programming / web scraping and crawling / puppeteer, headless chrome, cdp, chromedp edit
Try Documentalist, my app that offers fast, offline access to 190+ programmer API docs.

Note that both of those don't work more often than they do work.
const {Browser} = require('puppeteer');
const browser = new Browser({headless: false});

browser.newPage().then(async page => {
  page.on('load', () => console.log('LOADED: ' + page.url()));
  await page.navigate('https://google.com');
  await page.waitFor('input[name=q]');
  await page.focus('input[name=q]');
  await page.type('blin');
  await page.press('Enter');
  for (let i = 0; i < 10; ++i) {
    let searchResult = `div.g:nth-child(${i + 1}) h3 a`;
    await page.waitFor(searchResult, {visible: true});
    page.click(searchResult);
    await page.waitForNavigation();
    await page.screenshot({path: `screenshot-${i + 1}.png`});
    await page.goBack();
  }
  browser.close();
});
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://google.com', {waitUntil: 'networkidle2'});

await page.waitFor('input[name=q]');
// Type our query into the search bar
await page.type('input[name=q]', 'epub reader windows');

await page.click('input[type="submit"]');

// Wait for the results to show up
await page.waitForSelector('h3 a');

// Extract the results from the page
const links = await page.evaluate(() => {
  const anchors = Array.from(document.querySelectorAll('h3.r a'));
  return anchors.map(anchor => anchor.href);
});
console.log(links.join('\n'));
await browser.close();
const browser = await puppeteer.launch();
const page = await browser.newPage();

// https://www.google.com/search?q=epub+reader&start=10
function buildQuery(term, page=0) {
  const q = encodeURIComponent(term).replace("%20", "+");
  let url = 'https://google.com/search?q=' + q;
  if (page > 0) {
    const start = page * 10;
    url += '&start=' + start;
  }
  return url;
}

async function extractLinks(url) {
    console.log(`url: ${url}`);
    await page.goto(url, {waitUntil: 'networkidle2'});
    // Wait for the results to show up
    await page.waitForSelector('h3 a');
    // Extract the results from the page
    const links = await page.evaluate(() => {
        const anchors = Array.from(document.querySelectorAll('h3.r a'));
        return anchors.map(anchor => anchor.href);
    });
    return links;
}

async function extractFirst100(term) {
    const maxPages = 1; // 10
    for (var pageNo = 0; pageNo < maxPages;  pageNo++) {
        const url = buildQuery(term, pageNo);
        links = await extractLinks(url);
        console.log(links.join('\n'));
    }
}

async function extractFirst100v2(term) {
    const maxPages = 3; // 10
    let a = Array(maxPages);
    for (var pageNo = 0; pageNo < maxPages;  pageNo++) {
        const url = buildQuery(term, pageNo);
        a[pageNo] = extractLinks(url);
    }
    const res = await Promise.all(a);
    for (var pageNo = 0; pageNo < maxPages; pageNo++) {
        const links = res[pageNo];
        console.log(links.join('\n'));      
    }
}

await extractFirst100("epub reader");

await browser.close();
https://github.com/GoogleChromeLabs/puppeteer-examples
For Go:

Feedback about page:

Feedback:
Optional: your email if you want me to get back to you:

Share on        

Need fast, offline access to 190+ programmer API docs? Try my app Documentalist for Windows