Build a Online Youtube Comments Scraper in Node.js and Puppeteer – Downloads Comments in TXT File Full Project For Beginners

Build a Online Youtube Comments Scraper in Node.js and Puppeteer – Downloads Comments in TXT File Full Project For Beginners

 

 

 

 

Welcome folks today in this blog post we will be building a online youtube comments scraper in node.js and puppeteer.

 

 

Live Demo

 

 

You can see the live demo of web application here

 

 

Features of Youtube Comments Scraper

 

 

  1. It scrapes all the youtube video comments in no time

The efficiency of scraper is quite good

It saves all the youtube comments in a text file and downloads it

 

 

Get Started

 

 

Now to get started you need to create a empty directory and create a node.js project by initializing this command

 

npm init -y

 

Then install all these node.js modules

 

npm i express

 

npm i ejs

 

npm i nodemon

 

npm i puppeteer

 

After installing all these modules make a index.js file inside your root directory  and copy paste the following code

 

 

index.js

 

const puppeteer = require('puppeteer');
const fs = require('fs');
const express = require('express')
const path = require('path')

const bodyparser = require('body-parser')

const app = express()

app.use(bodyparser.json())

app.use(bodyparser.urlencoded({extended:false}))

app.set("view engine","ejs")

const PORT = 5000

app.get('/youtubecommentsscraper',(req,res) => {
    res.render('youtubecommentsscraper',{title:"Youtube Comments Scraper"})
})

app.get("/download", (req, res) => {
    var pathoutput = req.query.path;
    console.log(pathoutput);
    var fullpath = path.join(__dirname, pathoutput);
    res.download(fullpath, (err) => {
      if (err) {
        fs.unlinkSync(fullpath);
        res.send(err);
      }
      fs.unlinkSync(fullpath);
    });
  });
  

app.post('/getcomments',async(req,res) => {
    var url = req.body.url
  
    const comments = [];
    const browser = await puppeteer.launch({ headless: true});
    const page = await browser.newPage();
    process.on("unhandledRejection", (reason, p) => {
      console.error("Unhandled Rejection at: Promise", p, "reason:", reason);
      var formattedComments = comments.join("")
  
    fs.writeFileSync('comments.txt', formattedComments.trim());
      browser.close();
    });
    await page.setViewport({ width: 1280, height: 800 });
    
  const navigationPromise = page.waitForNavigation();
  await page.goto(url,{
    waitUntil: 'load',
          // Remove the timeout
          timeout: 0
  });
  
  
  
  await page.waitForSelector('h1.title');
  
  async function getElText(page, selector) {
      return await page.evaluate((selector) => {
          return document.querySelector(selector).innerText
      }, selector);
  }
  
  const filterNum = (str) => {
    const numericalChar = new Set([ ".",",","0","1","2","3","4","5","6","7","8","9" ]);
    str = str.split("").filter(char => numericalChar.has(char)).join("");
    return str;
  }
  
  await page.evaluate(_ => {
      window.scrollBy(0, window.innerHeight);
  
    });
    await page.waitFor(2000);
    await page.waitForSelector('#comments');
    const commentSelector = "#count > yt-formatted-string"
    await page.waitForSelector(commentSelector)
    const noOfComments = await getElText(page,commentSelector)
    console.log(noOfComments)
  
    var correctComments = filterNum(noOfComments)
    while (correctComments.search(",") >= 0) {
      correctComments = (correctComments + "").replace(',', '');
  }
    console.log(correctComments)
  
  
  
  
  
    await navigationPromise;
  
    // Write your code here
  
    const distance = 800; // should be less than or equal to window.innerHeight
    const delay = 2;
    
      for (let i = 1; i < correctComments; i++) {
        console.log(i);
        const authorSelector = `.style-scope:nth-child(${i}) > #comment > #body > #main > #header > #header-author > #author-text > .style-scope`;
        console.log(authorSelector);
        const commentSelector = `.style-scope:nth-child(${i}) > #comment > #body > #main > #expander #content-text`;
        try{
          await page.waitForSelector(commentSelector)
        }catch(error){
            break
        }
        try{
          await page.waitForSelector(authorSelector)
        }catch(error){
            break;
        }
        const commentText = await getElText(page, commentSelector);
        const stripped =commentText.replace(/^\s+|\s+$/gm,'');
        const author = await getElText(page, authorSelector);
        console.log(commentText)
        console.log(author)
        await page.evaluate((y) => { document.scrollingElement.scrollBy(0, y); }, distance);
        await page.waitFor(delay);
    
        if (commentText) {
          // write each comment to DB or file
          // or batch the for processing later
          console.log(`${author}: ${commentText}`);
          comments.push(author + ":" + stripped +"\n\n");
    
        }else{
            break;
        }
      }
    
    outputFilePath = Date.now() + "comments.txt"
  
    var formattedComments = comments.join("")
  
    fs.writeFileSync(outputFilePath, formattedComments);
  
    await browser.close();
  
    res.json({
      path:outputFilePath
    })
  })
  
  app.listen(PORT, () => {
    console.log(`App is listening on Port ${PORT}`);
  });

 

See also  Node.js Project to Get Instagram Profile Image and Posts Using Instagram-Links Library in Javascript Full Tutorial For Beginners

 

 

Now make a views folder inside the root directory and inside it make a youtubecommentsscraper.ejs file and copy paste the following code

 

 

views/youtubecommentsscraper.ejs

 

See also  Top FFMPEG Books to Read or Buy For Programmers to Build RealTime Applications in Javascript

 

<!DOCTYPE html>
<html>
  <head>
    <title><%=title%></title>
    <link
      rel="stylesheet"
      href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css"
    />
  </head>
  <body>
      <div class="container">
          <h1 class="text-center">
              Youtube Comments Scraper
          </h1>
          <form id="form">
            <div class="form-group">
                <label for="json">Youtube URL:</label>
                <input class="form-control" type="text" name="file" id="url" placeholder="Enter Youtube URL" required>
            </div>
            <div class="form-group">
                <button id="button" class="btn btn-block btn-danger">
                    Get Comments
                </button>
            </div>
        </form>
        <br><br>
        
      </div>
</body>
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
  <script>
      var url=""
      $("#form").submit(function(e){
          e.preventDefault()

          $("#button").text("Getting Comments Please Wait")

          $("#button").prop("disabled","true")

          url = $("#url").val()

          $.ajax({
              method:"POST",
              url:"/getcomments",
              data:{url:url},
              success:function(data){
                console.log(data.path)

                window.open('/download?path='+data.path)
          $("#button").text("Get Comments")
          $("#button").removeAttr("disabled")
          location.reload();
              }
          })




      })

      $("#url").change(function(e){
          url = $(this).val()
      })
  </script>
</html>

 

 

 

Screeshot

 

 

 

 

 

 

DOWNLOAD SOURCE CODE

 

 

 

Leave a Reply