info.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. const querystring = require('querystring');
  2. const sax = require('sax');
  3. const miniget = require('miniget');
  4. const utils = require('./utils');
  5. // Forces Node JS version of setTimeout for Electron based applications
  6. const {setTimeout} = require('timers');
  7. const formatUtils = require('./format-utils');
  8. const urlUtils = require('./url-utils');
  9. const extras = require('./info-extras');
  10. const sig = require('./sig');
  11. const BASE_URL = 'https://www.youtube.com/watch?v=';
  12. // Cache for cver used in getVideoInfoPage
  13. let cver = '2.20210622.10.00';
  14. // Special error class used to determine if an error is unrecoverable,
  15. // as in, ytdl-core should not try again to fetch the video metadata.
  16. // In this case, the video is usually unavailable in some way.
  17. class UnrecoverableError extends Error {
  18. }
  19. // List of URLs that show up in `notice_url` for age restricted videos.
  20. const AGE_RESTRICTED_URLS = [
  21. 'support.google.com/youtube/?p=age_restrictions',
  22. 'youtube.com/t/community_guidelines',
  23. ];
  24. /**
  25. * Gets info from a video without getting additional formats.
  26. *
  27. * @param {string} id
  28. * @param {Object} options
  29. * @returns {Promise<Object>}
  30. */
  31. exports.getBasicInfo = async (id, options) => {
  32. if (options.IPv6Block) {
  33. options.requestOptions = Object.assign({}, options.requestOptions, {
  34. family: 6,
  35. localAddress: utils.getRandomIPv6(options.IPv6Block),
  36. });
  37. }
  38. const retryOptions = Object.assign({}, miniget.defaultOptions, options.requestOptions);
  39. options.requestOptions = Object.assign({}, options.requestOptions, {});
  40. options.requestOptions.headers = Object.assign({},
  41. {
  42. // eslint-disable-next-line max-len
  43. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Safari/537.36',
  44. }, options.requestOptions.headers);
  45. const validate = info => {
  46. let playErr = utils.playError(info.player_response, ['ERROR'], UnrecoverableError);
  47. let privateErr = privateVideoError(info.player_response);
  48. if (playErr || privateErr) {
  49. throw playErr || privateErr;
  50. }
  51. return info && info.player_response && (
  52. info.player_response.streamingData || isRental(info.player_response) || isNotYetBroadcasted(info.player_response)
  53. );
  54. };
  55. let info = await pipeline([id, options], validate, retryOptions, [
  56. getWatchHTMLPage,
  57. getWatchJSONPage,
  58. getVideoInfoPage,
  59. ]);
  60. Object.assign(info, {
  61. formats: parseFormats(info.player_response),
  62. related_videos: extras.getRelatedVideos(info),
  63. });
  64. // Add additional properties to info.
  65. const media = extras.getMedia(info);
  66. const additional = {
  67. author: extras.getAuthor(info),
  68. media,
  69. likes: extras.getLikes(info),
  70. dislikes: extras.getDislikes(info),
  71. age_restricted: !!(media && AGE_RESTRICTED_URLS.some(url =>
  72. Object.values(media).some(v => typeof v === 'string' && v.includes(url)))
  73. ),
  74. // Give the standard link to the video.
  75. video_url: BASE_URL + id,
  76. storyboards: extras.getStoryboards(info),
  77. chapters: extras.getChapters(info),
  78. };
  79. info.videoDetails = extras.cleanVideoDetails(Object.assign({},
  80. info.player_response && info.player_response.microformat &&
  81. info.player_response.microformat.playerMicroformatRenderer,
  82. info.player_response && info.player_response.videoDetails, additional), info);
  83. return info;
  84. };
  85. const privateVideoError = player_response => {
  86. let playability = player_response && player_response.playabilityStatus;
  87. if (playability && playability.status === 'LOGIN_REQUIRED' && playability.messages &&
  88. playability.messages.filter(m => /This is a private video/.test(m)).length) {
  89. return new UnrecoverableError(playability.reason || (playability.messages && playability.messages[0]));
  90. } else {
  91. return null;
  92. }
  93. };
  94. const isRental = player_response => {
  95. let playability = player_response.playabilityStatus;
  96. return playability && playability.status === 'UNPLAYABLE' &&
  97. playability.errorScreen && playability.errorScreen.playerLegacyDesktopYpcOfferRenderer;
  98. };
  99. const isNotYetBroadcasted = player_response => {
  100. let playability = player_response.playabilityStatus;
  101. return playability && playability.status === 'LIVE_STREAM_OFFLINE';
  102. };
  103. const getWatchHTMLURL = (id, options) => `${BASE_URL + id}&hl=${options.lang || 'en'}`;
  104. const getWatchHTMLPageBody = (id, options) => {
  105. const url = getWatchHTMLURL(id, options);
  106. return exports.watchPageCache.getOrSet(url, () => utils.exposedMiniget(url, options).text());
  107. };
  108. const EMBED_URL = 'https://www.youtube.com/embed/';
  109. const getEmbedPageBody = (id, options) => {
  110. const embedUrl = `${EMBED_URL + id}?hl=${options.lang || 'en'}`;
  111. return utils.exposedMiniget(embedUrl, options).text();
  112. };
  113. const getHTML5player = body => {
  114. let html5playerRes =
  115. /<script\s+src="([^"]+)"(?:\s+type="text\/javascript")?\s+name="player_ias\/base"\s*>|"jsUrl":"([^"]+)"/
  116. .exec(body);
  117. return html5playerRes ? html5playerRes[1] || html5playerRes[2] : null;
  118. };
  119. const getIdentityToken = async (id, options, key, throwIfNotFound) => {
  120. let page = await getWatchHTMLPageBody(id, options);
  121. let match = page.match(/(["'])ID_TOKEN\1[:,]\s?"([^"]+)"/);
  122. if (!match && throwIfNotFound) {
  123. throw new UnrecoverableError('Cookie header used in request, but unable to find YouTube identity token');
  124. }
  125. return match && match[2];
  126. };
  127. /**
  128. * Goes through each endpoint in the pipeline, retrying on failure if the error is recoverable.
  129. * If unable to succeed with one endpoint, moves onto the next one.
  130. *
  131. * @param {Array.<Object>} args
  132. * @param {Function} validate
  133. * @param {Object} retryOptions
  134. * @param {Array.<Function>} endpoints
  135. * @returns {[Object, Object, Object]}
  136. */
  137. const pipeline = async (args, validate, retryOptions, endpoints) => {
  138. let info;
  139. for (let func of endpoints) {
  140. try {
  141. const newInfo = await retryFunc(func, args.concat([info]), retryOptions);
  142. if (newInfo.player_response) {
  143. newInfo.player_response.videoDetails = assign(
  144. info && info.player_response && info.player_response.videoDetails,
  145. newInfo.player_response.videoDetails);
  146. newInfo.player_response = assign(info && info.player_response, newInfo.player_response);
  147. }
  148. info = assign(info, newInfo);
  149. if (validate(info, false)) {
  150. break;
  151. }
  152. } catch (err) {
  153. if (err instanceof UnrecoverableError || func === endpoints[endpoints.length - 1]) {
  154. throw err;
  155. }
  156. // Unable to find video metadata... so try next endpoint.
  157. }
  158. }
  159. return info;
  160. };
  161. /**
  162. * Like Object.assign(), but ignores `null` and `undefined` from `source`.
  163. *
  164. * @param {Object} target
  165. * @param {Object} source
  166. * @returns {Object}
  167. */
  168. const assign = (target, source) => {
  169. if (!target || !source) {
  170. return target || source;
  171. }
  172. for (let [key, value] of Object.entries(source)) {
  173. if (value !== null && value !== undefined) {
  174. target[key] = value;
  175. }
  176. }
  177. return target;
  178. };
  179. /**
  180. * Given a function, calls it with `args` until it's successful,
  181. * or until it encounters an unrecoverable error.
  182. * Currently, any error from miniget is considered unrecoverable. Errors such as
  183. * too many redirects, invalid URL, status code 404, status code 502.
  184. *
  185. * @param {Function} func
  186. * @param {Array.<Object>} args
  187. * @param {Object} options
  188. * @param {number} options.maxRetries
  189. * @param {Object} options.backoff
  190. * @param {number} options.backoff.inc
  191. */
  192. const retryFunc = async (func, args, options) => {
  193. let currentTry = 0, result;
  194. while (currentTry <= options.maxRetries) {
  195. try {
  196. result = await func(...args);
  197. break;
  198. } catch (err) {
  199. if (err instanceof UnrecoverableError ||
  200. (err instanceof miniget.MinigetError && err.statusCode < 500) || currentTry >= options.maxRetries) {
  201. throw err;
  202. }
  203. let wait = Math.min(++currentTry * options.backoff.inc, options.backoff.max);
  204. await new Promise(resolve => setTimeout(resolve, wait));
  205. }
  206. }
  207. return result;
  208. };
  209. const jsonClosingChars = /^[)\]}'\s]+/;
  210. const parseJSON = (source, varName, json) => {
  211. if (!json || typeof json === 'object') {
  212. return json;
  213. } else {
  214. try {
  215. json = json.replace(jsonClosingChars, '');
  216. return JSON.parse(json);
  217. } catch (err) {
  218. throw Error(`Error parsing ${varName} in ${source}: ${err.message}`);
  219. }
  220. }
  221. };
  222. const findJSON = (source, varName, body, left, right, prependJSON) => {
  223. let jsonStr = utils.between(body, left, right);
  224. if (!jsonStr) {
  225. throw Error(`Could not find ${varName} in ${source}`);
  226. }
  227. return parseJSON(source, varName, utils.cutAfterJS(`${prependJSON}${jsonStr}`));
  228. };
  229. const findPlayerResponse = (source, info) => {
  230. const player_response = info && (
  231. (info.args && info.args.player_response) ||
  232. info.player_response || info.playerResponse || info.embedded_player_response);
  233. return parseJSON(source, 'player_response', player_response);
  234. };
  235. const getWatchJSONURL = (id, options) => `${getWatchHTMLURL(id, options)}&pbj=1`;
  236. const getWatchJSONPage = async (id, options) => {
  237. const reqOptions = Object.assign({headers: {}}, options.requestOptions);
  238. let cookie = reqOptions.headers.Cookie || reqOptions.headers.cookie;
  239. reqOptions.headers = Object.assign({
  240. 'x-youtube-client-name': '1',
  241. 'x-youtube-client-version': cver,
  242. 'x-youtube-identity-token': exports.cookieCache.get(cookie || 'browser') || '',
  243. }, reqOptions.headers);
  244. const setIdentityToken = async (key, throwIfNotFound) => {
  245. if (reqOptions.headers['x-youtube-identity-token']) {
  246. return;
  247. }
  248. reqOptions.headers['x-youtube-identity-token'] = await getIdentityToken(id, options, key, throwIfNotFound);
  249. };
  250. if (cookie) {
  251. await setIdentityToken(cookie, true);
  252. }
  253. const jsonUrl = getWatchJSONURL(id, options);
  254. const body = await utils.exposedMiniget(jsonUrl, options, reqOptions).text();
  255. let parsedBody = parseJSON('watch.json', 'body', body);
  256. if (parsedBody.reload === 'now') {
  257. await setIdentityToken('browser', false);
  258. }
  259. if (parsedBody.reload === 'now' || !Array.isArray(parsedBody)) {
  260. throw Error('Unable to retrieve video metadata in watch.json');
  261. }
  262. let info = parsedBody.reduce((part, curr) => Object.assign(curr, part), {});
  263. info.player_response = findPlayerResponse('watch.json', info);
  264. info.html5player = info.player && info.player.assets && info.player.assets.js;
  265. return info;
  266. };
  267. const getWatchHTMLPage = async (id, options) => {
  268. let body = await getWatchHTMLPageBody(id, options);
  269. let info = {page: 'watch'};
  270. try {
  271. cver = utils.between(body, '{"key":"cver","value":"', '"}');
  272. info.player_response = findJSON('watch.html', 'player_response',
  273. body, /\bytInitialPlayerResponse\s*=\s*\{/i, '</script>', '{');
  274. } catch (err) {
  275. let args = findJSON('watch.html', 'player_response', body, /\bytplayer\.config\s*=\s*{/, '</script>', '{');
  276. info.player_response = findPlayerResponse('watch.html', args);
  277. }
  278. info.response = findJSON('watch.html', 'response', body, /\bytInitialData("\])?\s*=\s*\{/i, '</script>', '{');
  279. info.html5player = getHTML5player(body);
  280. return info;
  281. };
  282. const INFO_HOST = 'www.youtube.com';
  283. const INFO_PATH = '/get_video_info';
  284. const VIDEO_EURL = 'https://youtube.googleapis.com/v/';
  285. const getVideoInfoPage = async (id, options) => {
  286. const url = new URL(`https://${INFO_HOST}${INFO_PATH}`);
  287. url.searchParams.set('video_id', id);
  288. url.searchParams.set('c', 'TVHTML5');
  289. url.searchParams.set('cver', `7${cver.substr(1)}`);
  290. url.searchParams.set('eurl', VIDEO_EURL + id);
  291. url.searchParams.set('ps', 'default');
  292. url.searchParams.set('gl', 'US');
  293. url.searchParams.set('hl', options.lang || 'en');
  294. url.searchParams.set('html5', '1');
  295. const body = await utils.exposedMiniget(url.toString(), options).text();
  296. let info = querystring.parse(body);
  297. info.player_response = findPlayerResponse('get_video_info', info);
  298. return info;
  299. };
  300. /**
  301. * @param {Object} player_response
  302. * @returns {Array.<Object>}
  303. */
  304. const parseFormats = player_response => {
  305. let formats = [];
  306. if (player_response && player_response.streamingData) {
  307. formats = formats
  308. .concat(player_response.streamingData.formats || [])
  309. .concat(player_response.streamingData.adaptiveFormats || []);
  310. }
  311. return formats;
  312. };
  313. /**
  314. * Gets info from a video additional formats and deciphered URLs.
  315. *
  316. * @param {string} id
  317. * @param {Object} options
  318. * @returns {Promise<Object>}
  319. */
  320. exports.getInfo = async (id, options) => {
  321. let info = await exports.getBasicInfo(id, options);
  322. const hasManifest =
  323. info.player_response && info.player_response.streamingData && (
  324. info.player_response.streamingData.dashManifestUrl ||
  325. info.player_response.streamingData.hlsManifestUrl
  326. );
  327. let funcs = [];
  328. if (info.formats.length) {
  329. info.html5player = info.html5player ||
  330. getHTML5player(await getWatchHTMLPageBody(id, options)) || getHTML5player(await getEmbedPageBody(id, options));
  331. if (!info.html5player) {
  332. throw Error('Unable to find html5player file');
  333. }
  334. const html5player = new URL(info.html5player, BASE_URL).toString();
  335. funcs.push(sig.decipherFormats(info.formats, html5player, options));
  336. }
  337. if (hasManifest && info.player_response.streamingData.dashManifestUrl) {
  338. let url = info.player_response.streamingData.dashManifestUrl;
  339. funcs.push(getDashManifest(url, options));
  340. }
  341. if (hasManifest && info.player_response.streamingData.hlsManifestUrl) {
  342. let url = info.player_response.streamingData.hlsManifestUrl;
  343. funcs.push(getM3U8(url, options));
  344. }
  345. let results = await Promise.all(funcs);
  346. info.formats = Object.values(Object.assign({}, ...results));
  347. info.formats = info.formats.map(formatUtils.addFormatMeta);
  348. info.formats.sort(formatUtils.sortFormats);
  349. info.full = true;
  350. return info;
  351. };
  352. /**
  353. * Gets additional DASH formats.
  354. *
  355. * @param {string} url
  356. * @param {Object} options
  357. * @returns {Promise<Array.<Object>>}
  358. */
  359. const getDashManifest = (url, options) => new Promise((resolve, reject) => {
  360. let formats = {};
  361. const parser = sax.parser(false);
  362. parser.onerror = reject;
  363. let adaptationSet;
  364. parser.onopentag = node => {
  365. if (node.name === 'ADAPTATIONSET') {
  366. adaptationSet = node.attributes;
  367. } else if (node.name === 'REPRESENTATION') {
  368. const itag = parseInt(node.attributes.ID);
  369. if (!isNaN(itag)) {
  370. formats[url] = Object.assign({
  371. itag, url,
  372. bitrate: parseInt(node.attributes.BANDWIDTH),
  373. mimeType: `${adaptationSet.MIMETYPE}; codecs="${node.attributes.CODECS}"`,
  374. }, node.attributes.HEIGHT ? {
  375. width: parseInt(node.attributes.WIDTH),
  376. height: parseInt(node.attributes.HEIGHT),
  377. fps: parseInt(node.attributes.FRAMERATE),
  378. } : {
  379. audioSampleRate: node.attributes.AUDIOSAMPLINGRATE,
  380. });
  381. }
  382. }
  383. };
  384. parser.onend = () => {
  385. resolve(formats);
  386. };
  387. const req = utils.exposedMiniget(new URL(url, BASE_URL).toString(), options);
  388. req.setEncoding('utf8');
  389. req.on('error', reject);
  390. req.on('data', chunk => {
  391. parser.write(chunk);
  392. });
  393. req.on('end', parser.close.bind(parser));
  394. });
  395. /**
  396. * Gets additional formats.
  397. *
  398. * @param {string} url
  399. * @param {Object} options
  400. * @returns {Promise<Array.<Object>>}
  401. */
  402. const getM3U8 = async (url, options) => {
  403. url = new URL(url, BASE_URL);
  404. const body = await utils.exposedMiniget(url.toString(), options).text();
  405. let formats = {};
  406. body
  407. .split('\n')
  408. .filter(line => /^https?:\/\//.test(line))
  409. .forEach(line => {
  410. const itag = parseInt(line.match(/\/itag\/(\d+)\//)[1]);
  411. formats[line] = {itag, url: line};
  412. });
  413. return formats;
  414. };
  415. // Cache get info functions.
  416. // In case a user wants to get a video's info before downloading.
  417. for (let funcName of ['getBasicInfo', 'getInfo']) {
  418. /**
  419. * @param {string} link
  420. * @param {Object} options
  421. * @returns {Promise<Object>}
  422. */
  423. const func = exports[funcName];
  424. exports[funcName] = async (link, options = {}) => {
  425. let id = await urlUtils.getVideoID(link);
  426. const key = [funcName, id, options.lang].join('-');
  427. return exports.cache.getOrSet(key, () => func(id, options));
  428. };
  429. }
  430. // Export a few helpers.
  431. exports.validateID = urlUtils.validateID;
  432. exports.validateURL = urlUtils.validateURL;
  433. exports.getURLVideoID = urlUtils.getURLVideoID;
  434. exports.getVideoID = urlUtils.getVideoID;