youtube_extractor.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. // YouTube 视频信息提取器
  2. // 处理网络请求
  3. request = async (method, url, data = null, headers = {}, requestId, platform) => {
  4. console.log(`request url:${url}`)
  5. console.log(`request data:${data}`)
  6. console.log(`request method:${method}`)
  7. console.log(`request headers:${JSON.stringify((headers))}`);
  8. if (platform === "WEB") {
  9. const res = await fetch(url, {
  10. 'mode': 'cors',
  11. 'method': method,
  12. 'headers': headers,
  13. 'body': data
  14. });
  15. const resData = await res.text();
  16. return Promise.resolve({
  17. 'data': resData,
  18. 'headers': res.headers
  19. });
  20. }
  21. return new Promise((resolve, reject) => {
  22. AF.request(url, method, data, headers, requestId, (data, headers, err) => {
  23. if (err) {
  24. console.log(`request error: ${err}`);
  25. reject(err);
  26. } else {
  27. console.log(`response headers: ${headers}`);
  28. resolve({
  29. 'data': data,
  30. 'headers': JSON.parse(headers)
  31. });
  32. }
  33. });
  34. });
  35. }
  36. // 解析视频编码信息
  37. parseCodecs = (format) => {
  38. const mimeType = format['mimeType']
  39. if (!mimeType) {
  40. return {};
  41. }
  42. const regex = /(?<mimetype>[^/]+\/[^;]+)(?:;\s*codecs="?(?<codecs>[^"]+))?/;
  43. const match = mimeType.match(regex);
  44. if (!match) {
  45. return {};
  46. }
  47. const codecs = match.groups.codecs;
  48. if (!codecs) {
  49. return {};
  50. }
  51. const splitCodecs = codecs.trim().replace(/,$/, '').split(',').map(str => str.trim()).filter(Boolean);
  52. let vcodec = null;
  53. let acodec = null;
  54. for (const fullCodec of splitCodecs) {
  55. const codec = fullCodec.split('.')[0];
  56. if (['avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'].includes(codec)) {
  57. if (!vcodec) {
  58. vcodec = fullCodec;
  59. }
  60. } else if (['mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'].includes(codec)) {
  61. if (!acodec) {
  62. acodec = fullCodec;
  63. }
  64. } else {
  65. console.log(`WARNING: Unknown codec ${fullCodec}`);
  66. }
  67. }
  68. if (!vcodec && !acodec) {
  69. if (splitCodecs.length === 2) {
  70. return {
  71. vcodec: splitCodecs[0],
  72. acodec: splitCodecs[1]
  73. };
  74. }
  75. } else {
  76. return {
  77. vcodec: vcodec,
  78. acodec: acodec
  79. };
  80. }
  81. return {};
  82. }
  83. // 从播放器JS中提取解密函数
  84. async function extractDecryptFunction(playerUrl, requestId, platform) {
  85. // 函数内部缓存
  86. const cache = extractDecryptFunction.cache || (extractDecryptFunction.cache = {});
  87. const cacheKey = `jsFunction:${playerUrl}`;
  88. if (cache[cacheKey]) {
  89. console.log(`从缓存获取解密函数: ${playerUrl}`);
  90. return cache[cacheKey];
  91. }
  92. const playerResp = await request('GET', playerUrl, null, {}, requestId, platform);
  93. const playerJs = playerResp.data;
  94. // 提取签名函数名
  95. const signatureFunctionName = playerJs.match(/\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*=>\s*([a-zA-Z$_][a-zA-Z$_0-9]*)\(/)[1];
  96. // 提取ncode函数名
  97. const ncodeFunctionName = playerJs.match(/\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*=>\s*([a-zA-Z$_][a-zA-Z$_0-9]*)\(\))/)[1];
  98. // 提取函数定义
  99. const functionPattern = new RegExp(`${signatureFunctionName}=function\\(\\w+\\)\\{[^\\}]+\\}`);
  100. const signatureFunction = playerJs.match(functionPattern)[0];
  101. const ncodeFunctionPattern = new RegExp(`${ncodeFunctionName}=function\\(\\)\\{[^\\}]+\\}`);
  102. const ncodeFunction = playerJs.match(ncodeFunctionPattern)[0];
  103. // 存入函数内部缓存
  104. const result = {
  105. signatureFunction,
  106. ncodeFunction
  107. };
  108. cache[cacheKey] = result;
  109. return result;
  110. }
  111. // 解析并执行解密函数
  112. function executeDecryptFunction(code, input) {
  113. const fn = new Function('a', code.replace(/^[^=]+=function/, 'return function'));
  114. return fn(input);
  115. }
  116. // 解密签名
  117. decryptSignature = async (signatureEncrypted, playerUrl, requestId, platform) => {
  118. try {
  119. // 提取解密函数
  120. const {signatureFunction, ncodeFunction} = await extractDecryptFunction(playerUrl, requestId, platform);
  121. // 执行签名解密
  122. const decryptedSignature = executeDecryptFunction(signatureFunction, signatureEncrypted);
  123. // 执行ncode处理
  124. const ncode = executeDecryptFunction(ncodeFunction, '');
  125. return {
  126. signature: decryptedSignature,
  127. ncode: ncode
  128. };
  129. } catch (e) {
  130. console.error('签名解密失败:', e);
  131. return {
  132. signature: signatureEncrypted,
  133. ncode: ''
  134. };
  135. }
  136. }
  137. // 获取视频详情
  138. detail = async (url, requestId, platform) => {
  139. try {
  140. // 获取视频页面 HTML
  141. const htmlResp = await request('GET', `${url}&bpctr=9999999999&has_verified=1`, null, {
  142. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
  143. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  144. 'Accept-Language': 'en-us,en;q=0.5',
  145. 'Sec-Fetch-Mode': 'navigate',
  146. 'Accept-Encoding': 'gzip, deflate, br',
  147. }, requestId, platform);
  148. let {data: html, headers: htmlHeaders} = htmlResp;
  149. // 解析初始播放器响应
  150. const playerMatch = html.match(/var ytInitialPlayerResponse\s*=\s*({.*?});/);
  151. if (!playerMatch) {
  152. throw new Error('无法找到播放器数据');
  153. }
  154. const ytInitialPlayerResponse = JSON.parse(playerMatch[1]);
  155. const originVideoDetails = ytInitialPlayerResponse['videoDetails'];
  156. // 获取推荐视频
  157. const recommendInfo = [];
  158. const ytInitialDataMatch = html.match(/var ytInitialData\s*=\s*({.*?});/);
  159. if (ytInitialDataMatch) {
  160. const ytInitialData = JSON.parse(ytInitialDataMatch[1]);
  161. const recommendations = ytInitialData.contents?.twoColumnWatchNextResults?.secondaryResults?.secondaryResults?.results || [];
  162. for (const item of recommendations) {
  163. if (item.compactVideoRenderer) {
  164. const video = item.compactVideoRenderer;
  165. if (video.videoId) {
  166. recommendInfo.push({
  167. type: "gridVideoRenderer",
  168. videoId: video.videoId,
  169. title: video.title?.simpleText,
  170. thumbnails: video.thumbnail?.thumbnails,
  171. channelName: video.longBylineText?.runs?.[0]?.text,
  172. publishedTimeText: video.publishedTimeText?.simpleText,
  173. viewCountText: video.viewCountText?.simpleText,
  174. shortViewCountText: video.shortViewCountText?.simpleText,
  175. lengthText: video.lengthText?.simpleText
  176. });
  177. }
  178. }
  179. }
  180. }
  181. // 获取播放格式
  182. const formats = [];
  183. const qualities = [];
  184. // 从 HTML 中获取格式
  185. const streamingData = ytInitialPlayerResponse.streamingData;
  186. const allFormats = [
  187. ...(streamingData.formats || []),
  188. ...(streamingData.adaptiveFormats || [])
  189. ];
  190. for (const format of allFormats) {
  191. if (format.height && parseInt(format.height) >= 720) {
  192. continue;
  193. }
  194. if (format && !qualities.includes(format.qualityLabel)) {
  195. const {vcodec, acodec} = parseCodecs(format);
  196. let finalUrl = format.url;
  197. if (!finalUrl && format.signatureCipher) {
  198. const urlParams = new URLSearchParams(format.signatureCipher);
  199. const url = urlParams.get('url');
  200. const s = urlParams.get('s');
  201. if (url && s) {
  202. const playerUrl = `https://www.youtube.com${html.match(/"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"/)?.at(1)}`;
  203. const {signature, ncode} = await decryptSignature(s, playerUrl, requestId, platform);
  204. finalUrl = `${url}&sig=${signature}&n=${ncode}`;
  205. }
  206. }
  207. if (finalUrl && vcodec && acodec) {
  208. formats.push({
  209. width: format.width + "",
  210. height: format.height + "",
  211. type: format.mimeType,
  212. quality: format.qualityLabel,
  213. itag: format.itag,
  214. fps: format.fps + "",
  215. bitrate: format.bitrate + "",
  216. ext: "mp4",
  217. vcodec: vcodec,
  218. acodec: acodec,
  219. vbr: "0",
  220. abr: "0",
  221. container: "mp4_dash",
  222. from: "web",
  223. url: format.url,
  224. videoUrl: "",
  225. audioUrl: ""
  226. });
  227. qualities.push(format.qualityLabel);
  228. }
  229. }
  230. }
  231. // 按高度排序
  232. formats.sort((a, b) => parseInt(a.height) - parseInt(b.height));
  233. // 构建缩略图列表
  234. const thumbnails = originVideoDetails.thumbnail.thumbnails.map(item => ({
  235. url: item.url,
  236. width: item.width + "",
  237. height: item.height + ""
  238. }));
  239. // 构建视频详情
  240. const videoDetails = {
  241. isLiveContent: originVideoDetails.isLiveContent,
  242. title: originVideoDetails.title,
  243. thumbnails: thumbnails,
  244. description: originVideoDetails.shortDescription,
  245. lengthSeconds: originVideoDetails.lengthSeconds,
  246. viewCount: originVideoDetails.viewCount,
  247. keywords: originVideoDetails.keywords,
  248. author: originVideoDetails.author,
  249. channelID: originVideoDetails.channelId,
  250. recommendInfo: recommendInfo,
  251. channelURL: `https://www.youtube.com/channel/${originVideoDetails.channelId}`,
  252. videoId: url.replace('https://www.youtube.com/watch?v=', '')
  253. };
  254. return {
  255. code: 200,
  256. msg: "",
  257. requestId: requestId,
  258. data: {
  259. videoDetails: videoDetails,
  260. streamingData: {
  261. formats: formats
  262. }
  263. },
  264. id: "MusicDetailViewModel_detail_url"
  265. };
  266. } catch (e) {
  267. console.error(e);
  268. return {
  269. code: -1,
  270. msg: e.toString(),
  271. requestId: requestId
  272. };
  273. }
  274. }