What Is Puppeteer?
From the Puppeteer repo README:
Puppeteer is a Node library which provides a high-level API to control headless Chrome over the DevTools Protocol. It can also be configured to use full (non-headless) Chrome.
What Can I Do With Puppeteer?
Getting Started
Required
- Node.js v7.6 or greater
Note: Puppeteer requires at least Node v6.4.0, but the examples below use async/await which is only supported in Node v7.6.0 or greater
Installation
yarn add puppeteer
# or "npm i puppeteer"
Recipes
Below are a few simple examples to get you started:
Screenshot: Viewport
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Large viewport
page.setViewport({ width: 1280, height: 800 });
await page.goto('https://github.com', { waitUntil: 'networkidle' });
// Screenshot
await page.screenshot({ path: 'screenshot-viewport.png' });
browser.close();
})();
Screenshot: Full Page
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Large viewport
page.setViewport({ width: 1280, height: 800 });
await page.goto('https://github.com', { waitUntil: 'networkidle' });
// Screenshot
await page.screenshot({ path: 'screenshot-full.png', fullPage: true });
browser.close();
})();
Screenshot: Page Coordinates
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Large viewport
page.setViewport({ width: 1280, height: 800 });
await page.goto('https://github.com', { waitUntil: 'networkidle' });
// Screenshot
await page.screenshot({
path: 'screenshot-coords.png',
clip: { x: 147, y: 19, width: 34, height: 34 },
});
browser.close();
})();
Screenshot: DOM Element
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Large viewport
page.setViewport({ width: 1280, height: 800 });
await page.goto('https://github.com', { waitUntil: 'networkidle' });
async function screenshotDOMElement(selector, padding = 0) {
const rect = await page.evaluate(selector => {
const element = document.querySelector(selector);
const { x, y, width, height } = element.getBoundingClientRect();
return { left: x, top: y, width, height, id: element.id };
}, selector);
console.log('rect: ', rect);
return await page.screenshot({
path: 'screenshot-element.png',
clip: {
x: rect.left - padding,
y: rect.top - padding,
width: rect.width + padding * 2,
height: rect.height + padding * 2,
},
});
}
// Screenshot
await screenshotDOMElement('.header-logo-invertocat', 1);
browser.close();
})();
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Large viewport
page.setViewport({ width: 1280, height: 800 });
await page.goto('https://github.com', { waitUntil: 'networkidle' });
// PDF
await page.pdf({ path: 'github.pdf', format: 'letter' });
browser.close();
})();
Search GitHub & Extract Results
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Large viewport
page.setViewport({ width: 1280, height: 800 });
await page.goto('https://github.com', { waitUntil: 'networkidle' });
// Click seach input
await page.click('.header-search-input');
// Type query into search input
await page.type('puppeteer');
// Submit search form
const result = await page.evaluate(() => {
const searchForm = document.querySelector('form[action="/search"]');
searchForm.submit();
});
// Wait for results
await page.waitForSelector('h3 a');
// Extract results from the page
const links = await page.evaluate(() => {
const anchors = Array.from(document.querySelectorAll('h3 a'));
return anchors.map(anchor => anchor.textContent);
});
console.log(links.join('\n'));
browser.close();
})();
Start Automating!
Now that you’ve seen a few examples, you’re ready to start automating the web for fun & profit! You’re limited only by your imagination. Explore Puppeteer’s API Docs to learn more.