Warm tip: This article is reproduced from serverfault.com, please click

Asynchronous function recognizes function is done when really it still has more to do

发布于 2020-12-04 04:36:12

The page.on is reconized by the async for loop at the bottom as finished and ready to run the function again, but its not actually done. It still needs to run everything up to page.close. How do I let the async function know that it is done after page.close, not page.on? Let me know if you need anymore info, thanks.

const puppeteer = require('puppeteer');
const fs = require('fs');
const req = require('request');
const got = require('got');
const NodeID3 = require('node-id3');
const readline = require('readline');
const selectors = require('./selectors');

const getDownloadUrl = async (url, browser) => {
    const page = await browser.newPage();
    await page.goto(url);
    page.setRequestInterception(true);
    await page._client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: './Songs'})

    const baseUrl = 'https://cf-hls-media.sndcdn.com/media/';

    await page.on('request', async (request) => {
        if(request.url().includes(baseUrl)){
            const downloadUrl = fixUrl(request.url());
            const info = await getSongInfo(page);
            downloadSong(downloadUrl, info.title);
            await tagSong(info);
            await request.abort();
            await page.close();
        } else {
            request.continue();
        }
    });
};

const fixUrl = (url) => {
   ...
};

const downloadSong = (url, title) => {
   ...
};

const getSongInfo = async (page) => {
   ...
};

const tagSong = async (info) => {
   ...
};

(() => {
    const readInterface = readline.createInterface({
        input: fs.createReadStream('../Song Urls.csv'),
        output: process.stdout,
        console: false,
        terminal: false,
    });
    
    let urls = [];
    readInterface.on('line', function(line) {
        urls.push(line);
    }).on('close', async () => {
        const browser = await puppeteer.launch({headless: false});

        for (let i = 0; i < urls.length; i++) {
            const url = urls[i];
            await getDownloadUrl(url, browser);
        }
    });
})();

/*
Issue: The loop recognizes that the getDownloadUrl function is done even though it's
not and continues anyways.
*/
Questioner
ethans33
Viewed
0
Nicholas Tower 2020-12-04 13:07:59

await only works with promises, and page.on looks to be a callback-based event listener, not something that returns a promise. If you want to be able to await it, you will need to create a promise around it.

await new Promise((resolve) => {
  page.on('request', async (request) => {
    if(request.url().includes(baseUrl)){
      const downloadUrl = fixUrl(request.url());
      const info = await getSongInfo(page);
      downloadSong(downloadUrl, info.title);
      await tagSong(info);
      await request.abort();
      await page.close();
      resolve();
    } else {
      request.continue();
    }
  });
})