123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497 |
- const querystring = require('querystring');
- const sax = require('sax');
- const miniget = require('miniget');
- const utils = require('./utils');
- // Forces Node JS version of setTimeout for Electron based applications
- const {setTimeout} = require('timers');
- const formatUtils = require('./format-utils');
- const urlUtils = require('./url-utils');
- const extras = require('./info-extras');
- const sig = require('./sig');
- const BASE_URL = 'https://www.youtube.com/watch?v=';
- // Cache for cver used in getVideoInfoPage
- let cver = '2.20210622.10.00';
- // Special error class used to determine if an error is unrecoverable,
- // as in, ytdl-core should not try again to fetch the video metadata.
- // In this case, the video is usually unavailable in some way.
- class UnrecoverableError extends Error {
- }
- // List of URLs that show up in `notice_url` for age restricted videos.
- const AGE_RESTRICTED_URLS = [
- 'support.google.com/youtube/?p=age_restrictions',
- 'youtube.com/t/community_guidelines',
- ];
- /**
- * Gets info from a video without getting additional formats.
- *
- * @param {string} id
- * @param {Object} options
- * @returns {Promise<Object>}
- */
- exports.getBasicInfo = async (id, options) => {
- if (options.IPv6Block) {
- options.requestOptions = Object.assign({}, options.requestOptions, {
- family: 6,
- localAddress: utils.getRandomIPv6(options.IPv6Block),
- });
- }
- const retryOptions = Object.assign({}, miniget.defaultOptions, options.requestOptions);
- options.requestOptions = Object.assign({}, options.requestOptions, {});
- options.requestOptions.headers = Object.assign({},
- {
- // eslint-disable-next-line max-len
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Safari/537.36',
- }, options.requestOptions.headers);
- const validate = info => {
- let playErr = utils.playError(info.player_response, ['ERROR'], UnrecoverableError);
- let privateErr = privateVideoError(info.player_response);
- if (playErr || privateErr) {
- throw playErr || privateErr;
- }
- return info && info.player_response && (
- info.player_response.streamingData || isRental(info.player_response) || isNotYetBroadcasted(info.player_response)
- );
- };
- let info = await pipeline([id, options], validate, retryOptions, [
- getWatchHTMLPage,
- getWatchJSONPage,
- getVideoInfoPage,
- ]);
- Object.assign(info, {
- formats: parseFormats(info.player_response),
- related_videos: extras.getRelatedVideos(info),
- });
- // Add additional properties to info.
- const media = extras.getMedia(info);
- const additional = {
- author: extras.getAuthor(info),
- media,
- likes: extras.getLikes(info),
- dislikes: extras.getDislikes(info),
- age_restricted: !!(media && AGE_RESTRICTED_URLS.some(url =>
- Object.values(media).some(v => typeof v === 'string' && v.includes(url)))
- ),
- // Give the standard link to the video.
- video_url: BASE_URL + id,
- storyboards: extras.getStoryboards(info),
- chapters: extras.getChapters(info),
- };
- info.videoDetails = extras.cleanVideoDetails(Object.assign({},
- info.player_response && info.player_response.microformat &&
- info.player_response.microformat.playerMicroformatRenderer,
- info.player_response && info.player_response.videoDetails, additional), info);
- return info;
- };
- const privateVideoError = player_response => {
- let playability = player_response && player_response.playabilityStatus;
- if (playability && playability.status === 'LOGIN_REQUIRED' && playability.messages &&
- playability.messages.filter(m => /This is a private video/.test(m)).length) {
- return new UnrecoverableError(playability.reason || (playability.messages && playability.messages[0]));
- } else {
- return null;
- }
- };
- const isRental = player_response => {
- let playability = player_response.playabilityStatus;
- return playability && playability.status === 'UNPLAYABLE' &&
- playability.errorScreen && playability.errorScreen.playerLegacyDesktopYpcOfferRenderer;
- };
- const isNotYetBroadcasted = player_response => {
- let playability = player_response.playabilityStatus;
- return playability && playability.status === 'LIVE_STREAM_OFFLINE';
- };
- const getWatchHTMLURL = (id, options) => `${BASE_URL + id}&hl=${options.lang || 'en'}`;
- const getWatchHTMLPageBody = (id, options) => {
- const url = getWatchHTMLURL(id, options);
- return exports.watchPageCache.getOrSet(url, () => utils.exposedMiniget(url, options).text());
- };
- const EMBED_URL = 'https://www.youtube.com/embed/';
- const getEmbedPageBody = (id, options) => {
- const embedUrl = `${EMBED_URL + id}?hl=${options.lang || 'en'}`;
- return utils.exposedMiniget(embedUrl, options).text();
- };
- const getHTML5player = body => {
- let html5playerRes =
- /<script\s+src="([^"]+)"(?:\s+type="text\/javascript")?\s+name="player_ias\/base"\s*>|"jsUrl":"([^"]+)"/
- .exec(body);
- return html5playerRes ? html5playerRes[1] || html5playerRes[2] : null;
- };
- const getIdentityToken = async (id, options, key, throwIfNotFound) => {
- let page = await getWatchHTMLPageBody(id, options);
- let match = page.match(/(["'])ID_TOKEN\1[:,]\s?"([^"]+)"/);
- if (!match && throwIfNotFound) {
- throw new UnrecoverableError('Cookie header used in request, but unable to find YouTube identity token');
- }
- return match && match[2];
- };
- /**
- * Goes through each endpoint in the pipeline, retrying on failure if the error is recoverable.
- * If unable to succeed with one endpoint, moves onto the next one.
- *
- * @param {Array.<Object>} args
- * @param {Function} validate
- * @param {Object} retryOptions
- * @param {Array.<Function>} endpoints
- * @returns {[Object, Object, Object]}
- */
- const pipeline = async (args, validate, retryOptions, endpoints) => {
- let info;
- for (let func of endpoints) {
- try {
- const newInfo = await retryFunc(func, args.concat([info]), retryOptions);
- if (newInfo.player_response) {
- newInfo.player_response.videoDetails = assign(
- info && info.player_response && info.player_response.videoDetails,
- newInfo.player_response.videoDetails);
- newInfo.player_response = assign(info && info.player_response, newInfo.player_response);
- }
- info = assign(info, newInfo);
- if (validate(info, false)) {
- break;
- }
- } catch (err) {
- if (err instanceof UnrecoverableError || func === endpoints[endpoints.length - 1]) {
- throw err;
- }
- // Unable to find video metadata... so try next endpoint.
- }
- }
- return info;
- };
- /**
- * Like Object.assign(), but ignores `null` and `undefined` from `source`.
- *
- * @param {Object} target
- * @param {Object} source
- * @returns {Object}
- */
- const assign = (target, source) => {
- if (!target || !source) {
- return target || source;
- }
- for (let [key, value] of Object.entries(source)) {
- if (value !== null && value !== undefined) {
- target[key] = value;
- }
- }
- return target;
- };
- /**
- * Given a function, calls it with `args` until it's successful,
- * or until it encounters an unrecoverable error.
- * Currently, any error from miniget is considered unrecoverable. Errors such as
- * too many redirects, invalid URL, status code 404, status code 502.
- *
- * @param {Function} func
- * @param {Array.<Object>} args
- * @param {Object} options
- * @param {number} options.maxRetries
- * @param {Object} options.backoff
- * @param {number} options.backoff.inc
- */
- const retryFunc = async (func, args, options) => {
- let currentTry = 0, result;
- while (currentTry <= options.maxRetries) {
- try {
- result = await func(...args);
- break;
- } catch (err) {
- if (err instanceof UnrecoverableError ||
- (err instanceof miniget.MinigetError && err.statusCode < 500) || currentTry >= options.maxRetries) {
- throw err;
- }
- let wait = Math.min(++currentTry * options.backoff.inc, options.backoff.max);
- await new Promise(resolve => setTimeout(resolve, wait));
- }
- }
- return result;
- };
- const jsonClosingChars = /^[)\]}'\s]+/;
- const parseJSON = (source, varName, json) => {
- if (!json || typeof json === 'object') {
- return json;
- } else {
- try {
- json = json.replace(jsonClosingChars, '');
- return JSON.parse(json);
- } catch (err) {
- throw Error(`Error parsing ${varName} in ${source}: ${err.message}`);
- }
- }
- };
- const findJSON = (source, varName, body, left, right, prependJSON) => {
- let jsonStr = utils.between(body, left, right);
- if (!jsonStr) {
- throw Error(`Could not find ${varName} in ${source}`);
- }
- return parseJSON(source, varName, utils.cutAfterJS(`${prependJSON}${jsonStr}`));
- };
- const findPlayerResponse = (source, info) => {
- const player_response = info && (
- (info.args && info.args.player_response) ||
- info.player_response || info.playerResponse || info.embedded_player_response);
- return parseJSON(source, 'player_response', player_response);
- };
- const getWatchJSONURL = (id, options) => `${getWatchHTMLURL(id, options)}&pbj=1`;
- const getWatchJSONPage = async (id, options) => {
- const reqOptions = Object.assign({headers: {}}, options.requestOptions);
- let cookie = reqOptions.headers.Cookie || reqOptions.headers.cookie;
- reqOptions.headers = Object.assign({
- 'x-youtube-client-name': '1',
- 'x-youtube-client-version': cver,
- 'x-youtube-identity-token': exports.cookieCache.get(cookie || 'browser') || '',
- }, reqOptions.headers);
- const setIdentityToken = async (key, throwIfNotFound) => {
- if (reqOptions.headers['x-youtube-identity-token']) {
- return;
- }
- reqOptions.headers['x-youtube-identity-token'] = await getIdentityToken(id, options, key, throwIfNotFound);
- };
- if (cookie) {
- await setIdentityToken(cookie, true);
- }
- const jsonUrl = getWatchJSONURL(id, options);
- const body = await utils.exposedMiniget(jsonUrl, options, reqOptions).text();
- let parsedBody = parseJSON('watch.json', 'body', body);
- if (parsedBody.reload === 'now') {
- await setIdentityToken('browser', false);
- }
- if (parsedBody.reload === 'now' || !Array.isArray(parsedBody)) {
- throw Error('Unable to retrieve video metadata in watch.json');
- }
- let info = parsedBody.reduce((part, curr) => Object.assign(curr, part), {});
- info.player_response = findPlayerResponse('watch.json', info);
- info.html5player = info.player && info.player.assets && info.player.assets.js;
- return info;
- };
- const getWatchHTMLPage = async (id, options) => {
- let body = await getWatchHTMLPageBody(id, options);
- let info = {page: 'watch'};
- try {
- cver = utils.between(body, '{"key":"cver","value":"', '"}');
- info.player_response = findJSON('watch.html', 'player_response',
- body, /\bytInitialPlayerResponse\s*=\s*\{/i, '</script>', '{');
- } catch (err) {
- let args = findJSON('watch.html', 'player_response', body, /\bytplayer\.config\s*=\s*{/, '</script>', '{');
- info.player_response = findPlayerResponse('watch.html', args);
- }
- info.response = findJSON('watch.html', 'response', body, /\bytInitialData("\])?\s*=\s*\{/i, '</script>', '{');
- info.html5player = getHTML5player(body);
- return info;
- };
- const INFO_HOST = 'www.youtube.com';
- const INFO_PATH = '/get_video_info';
- const VIDEO_EURL = 'https://youtube.googleapis.com/v/';
- const getVideoInfoPage = async (id, options) => {
- const url = new URL(`https://${INFO_HOST}${INFO_PATH}`);
- url.searchParams.set('video_id', id);
- url.searchParams.set('c', 'TVHTML5');
- url.searchParams.set('cver', `7${cver.substr(1)}`);
- url.searchParams.set('eurl', VIDEO_EURL + id);
- url.searchParams.set('ps', 'default');
- url.searchParams.set('gl', 'US');
- url.searchParams.set('hl', options.lang || 'en');
- url.searchParams.set('html5', '1');
- const body = await utils.exposedMiniget(url.toString(), options).text();
- let info = querystring.parse(body);
- info.player_response = findPlayerResponse('get_video_info', info);
- return info;
- };
- /**
- * @param {Object} player_response
- * @returns {Array.<Object>}
- */
- const parseFormats = player_response => {
- let formats = [];
- if (player_response && player_response.streamingData) {
- formats = formats
- .concat(player_response.streamingData.formats || [])
- .concat(player_response.streamingData.adaptiveFormats || []);
- }
- return formats;
- };
- /**
- * Gets info from a video additional formats and deciphered URLs.
- *
- * @param {string} id
- * @param {Object} options
- * @returns {Promise<Object>}
- */
- exports.getInfo = async (id, options) => {
- let info = await exports.getBasicInfo(id, options);
- const hasManifest =
- info.player_response && info.player_response.streamingData && (
- info.player_response.streamingData.dashManifestUrl ||
- info.player_response.streamingData.hlsManifestUrl
- );
- let funcs = [];
- if (info.formats.length) {
- info.html5player = info.html5player ||
- getHTML5player(await getWatchHTMLPageBody(id, options)) || getHTML5player(await getEmbedPageBody(id, options));
- if (!info.html5player) {
- throw Error('Unable to find html5player file');
- }
- const html5player = new URL(info.html5player, BASE_URL).toString();
- funcs.push(sig.decipherFormats(info.formats, html5player, options));
- }
- if (hasManifest && info.player_response.streamingData.dashManifestUrl) {
- let url = info.player_response.streamingData.dashManifestUrl;
- funcs.push(getDashManifest(url, options));
- }
- if (hasManifest && info.player_response.streamingData.hlsManifestUrl) {
- let url = info.player_response.streamingData.hlsManifestUrl;
- funcs.push(getM3U8(url, options));
- }
- let results = await Promise.all(funcs);
- info.formats = Object.values(Object.assign({}, ...results));
- info.formats = info.formats.map(formatUtils.addFormatMeta);
- info.formats.sort(formatUtils.sortFormats);
- info.full = true;
- return info;
- };
- /**
- * Gets additional DASH formats.
- *
- * @param {string} url
- * @param {Object} options
- * @returns {Promise<Array.<Object>>}
- */
- const getDashManifest = (url, options) => new Promise((resolve, reject) => {
- let formats = {};
- const parser = sax.parser(false);
- parser.onerror = reject;
- let adaptationSet;
- parser.onopentag = node => {
- if (node.name === 'ADAPTATIONSET') {
- adaptationSet = node.attributes;
- } else if (node.name === 'REPRESENTATION') {
- const itag = parseInt(node.attributes.ID);
- if (!isNaN(itag)) {
- formats[url] = Object.assign({
- itag, url,
- bitrate: parseInt(node.attributes.BANDWIDTH),
- mimeType: `${adaptationSet.MIMETYPE}; codecs="${node.attributes.CODECS}"`,
- }, node.attributes.HEIGHT ? {
- width: parseInt(node.attributes.WIDTH),
- height: parseInt(node.attributes.HEIGHT),
- fps: parseInt(node.attributes.FRAMERATE),
- } : {
- audioSampleRate: node.attributes.AUDIOSAMPLINGRATE,
- });
- }
- }
- };
- parser.onend = () => {
- resolve(formats);
- };
- const req = utils.exposedMiniget(new URL(url, BASE_URL).toString(), options);
- req.setEncoding('utf8');
- req.on('error', reject);
- req.on('data', chunk => {
- parser.write(chunk);
- });
- req.on('end', parser.close.bind(parser));
- });
- /**
- * Gets additional formats.
- *
- * @param {string} url
- * @param {Object} options
- * @returns {Promise<Array.<Object>>}
- */
- const getM3U8 = async (url, options) => {
- url = new URL(url, BASE_URL);
- const body = await utils.exposedMiniget(url.toString(), options).text();
- let formats = {};
- body
- .split('\n')
- .filter(line => /^https?:\/\//.test(line))
- .forEach(line => {
- const itag = parseInt(line.match(/\/itag\/(\d+)\//)[1]);
- formats[line] = {itag, url: line};
- });
- return formats;
- };
- // Cache get info functions.
- // In case a user wants to get a video's info before downloading.
- for (let funcName of ['getBasicInfo', 'getInfo']) {
- /**
- * @param {string} link
- * @param {Object} options
- * @returns {Promise<Object>}
- */
- const func = exports[funcName];
- exports[funcName] = async (link, options = {}) => {
- let id = await urlUtils.getVideoID(link);
- const key = [funcName, id, options.lang].join('-');
- return exports.cache.getOrSet(key, () => func(id, options));
- };
- }
- // Export a few helpers.
- exports.validateID = urlUtils.validateID;
- exports.validateURL = urlUtils.validateURL;
- exports.getURLVideoID = urlUtils.getURLVideoID;
- exports.getVideoID = urlUtils.getVideoID;
|