Node.js Puppeteer Project Scraping Youtube Comments of a Youtube Video Full Tutorial For Beginners

Node.js Puppeteer Project Scraping Youtube Comments of a Youtube Video Full Tutorial For Beginners

Welcome folks today in this tutorial we will be building a simple node.js project in which we will be scraping all the comments of a youtube video and storing it in a text file using puppeteer scraping tool. All the source code is given below and a step by step youtube video is also shown below.

 

 

 

 

npm i puppeteer

 

Install this only one dependency which we will need for this project. After it make the app.js file which will be the starting point of the application. Now in this we will be writing a simple puppeteer script to open a link of youtube video.

 

const puppeteer = require('puppeteer');

(async () => {
  const browser = await puppeteer.launch({ headless: false });
  const page = await browser.newPage();
  await page.setViewport({ width: 1280, height: 800 });

  const navigationPromise = page.waitForNavigation();
  await navigationPromise;

  // Write your code here

  await browser.close();
})()

 

Now we will force the puppeteer library to go to a certain url

const navigationPromise = page.waitForNavigation();
await page.goto('https://www.youtube.com/watch?v=A23O4aUftXk',{
  waitUntil: 'load',
        // Remove the timeout
        timeout: 0
});

 

Now we will bind a certain error event on puppeteer. When any error occurs in puppeteer then this will be fired then automatically the browser will close and we will render all the comments of the video into the comments array

 

process.on("unhandledRejection", (reason, p) => {
    console.error("Unhandled Rejection at: Promise", p, "reason:", reason);
    var formattedComments = comments.join("")

  fs.writeFileSync('comments.txt', formattedComments);
    browser.close();
  });

 

For this you need to globally declare the comments array right here at the top of the application

const comments = []

 

See also  jQuery Input Mask Form Fields Example For Validating Phone Number,Date & Credit Cards in Browser Full Project For Beginners

Now we will wait for the necessary items to be loaded on the page

await page.waitForSelector('h1.title');

await page.evaluate(_ => {
    window.scrollBy(0, window.innerHeight);

  });
  await page.waitFor(2000);
  await page.waitForSelector('#comments');
  const commentSelector = "#count > yt-formatted-string"
  await page.waitForSelector(commentSelector)
  const noOfComments = await getElText(page,commentSelector)
  console.log(noOfComments)

  const correctComments = filterNum(noOfComments)
  console.log(correctComments)

 

In this block of code we are waiting for the correct no of comments to be displayed on the screen and also for the comments to load properly

Now we will loop through all the comments of the video and push that comments to the comments array. And also last we will save that array to the comments.txt file using the fs module which is a built in module for node.js

 

const distance = 400; // should be less than or equal to window.innerHeight
  const delay = 50;
  
    for (let i = 1; i < correctComments; i++) {
      console.log(i);
      const authorSelector = `.style-scope:nth-child(${i}) > #comment > #body > #main > #header > #header-author > #author-text > .style-scope`;
      console.log(authorSelector);
      const commentSelector = `.style-scope:nth-child(${i}) > #comment > #body > #main > #expander #content-text`;
      await page.waitForSelector(commentSelector);
      await page.waitForSelector(authorSelector);
      const commentText = await getElText(page, commentSelector);
      const stripped =commentText.replace(/^\s+|\s+$/gm,'');
      const author = await getElText(page, authorSelector);
      console.log(commentText)
      console.log(author)
      await page.evaluate((y) => { document.scrollingElement.scrollBy(0, y); }, distance);
      await page.waitFor(delay);
  
      if (commentText) {
        // write each comment to DB or file
        // or batch the for processing later
        console.log(`${author}: ${commentText}`);
        comments.push(author + ":" + stripped +"\n\n");
  
      }
    }
  
  

  var formattedComments = comments.join("")

  fs.writeFileSync('comments.txt', formattedComments);

 

Full Source Code

 

const puppeteer = require('puppeteer');
const fs = require('fs');

(async () => {

  const comments = [];
  const browser = await puppeteer.launch({ headless: false });
  const page = await browser.newPage();
  process.on("unhandledRejection", (reason, p) => {
    console.error("Unhandled Rejection at: Promise", p, "reason:", reason);
    var formattedComments = comments.join("")

  fs.writeFileSync('comments.txt', formattedComments);
    browser.close();
  });
  await page.setViewport({ width: 1280, height: 800 });
  
const navigationPromise = page.waitForNavigation();
await page.goto('###youryoutubeurl####',{
  waitUntil: 'load',
        // Remove the timeout
        timeout: 0
});



await page.waitForSelector('h1.title');

async function getElText(page, selector) {
	return await page.evaluate((selector) => {
		return document.querySelector(selector).innerText
	}, selector);
}

const filterNum = (str) => {
  const numericalChar = new Set([ ".",",","0","1","2","3","4","5","6","7","8","9" ]);
  str = str.split("").filter(char => numericalChar.has(char)).join("");
  return str;
}

await page.evaluate(_ => {
    window.scrollBy(0, window.innerHeight);

  });
  await page.waitFor(2000);
  await page.waitForSelector('#comments');
  const commentSelector = "#count > yt-formatted-string"
  await page.waitForSelector(commentSelector)
  const noOfComments = await getElText(page,commentSelector)
  console.log(noOfComments)

  var correctComments = filterNum(noOfComments)
  while (correctComments.search(",") >= 0) {
    correctComments = (correctComments + "").replace(',', '');
}
  console.log(correctComments)





  await navigationPromise;

  // Write your code here

  const distance = 800; // should be less than or equal to window.innerHeight
  const delay = 2;
  
    for (let i = 1; i < correctComments; i++) {
      console.log(i);
      const authorSelector = `.style-scope:nth-child(${i}) > #comment > #body > #main > #header > #header-author > #author-text > .style-scope`;
      console.log(authorSelector);
      const commentSelector = `.style-scope:nth-child(${i}) > #comment > #body > #main > #expander #content-text`;
      await page.waitForSelector(commentSelector);
      await page.waitForSelector(authorSelector);
      const commentText = await getElText(page, commentSelector);
      const stripped =commentText.replace(/^\s+|\s+$/gm,'');
      const author = await getElText(page, authorSelector);
      console.log(commentText)
      console.log(author)
      await page.evaluate((y) => { document.scrollingElement.scrollBy(0, y); }, distance);
      await page.waitFor(delay);
  
      if (commentText) {
        // write each comment to DB or file
        // or batch the for processing later
        console.log(`${author}: ${commentText}`);
        comments.push(author + ":" + stripped +"\n\n");
  
      }
    }
  
  

  var formattedComments = comments.join("")

  fs.writeFileSync('comments.txt', formattedComments);

  await browser.close();
})()

 

See also  Build a Unit Weight Converter (Kilograms + Pounds + Ounces + Grams) in Javascript Full Project For Beginners

 

DOWNLOAD SOURCE CODE

 

Leave a Reply