Source: lib/text/vtt_text_parser.js

  1. /**
  2. * @license
  3. * Copyright 2016 Google Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. goog.provide('shaka.text.VttTextParser');
  18. goog.require('goog.asserts');
  19. goog.require('shaka.log');
  20. goog.require('shaka.text.Cue');
  21. goog.require('shaka.text.CueRegion');
  22. goog.require('shaka.text.TextEngine');
  23. goog.require('shaka.util.Error');
  24. goog.require('shaka.util.StringUtils');
  25. goog.require('shaka.util.TextParser');
  26. /**
  27. * @constructor
  28. * @implements {shakaExtern.TextParser}
  29. */
  30. shaka.text.VttTextParser = function() {};
  31. /** @override */
  32. shaka.text.VttTextParser.prototype.parseInit = function(data) {
  33. goog.asserts.assert(false, 'VTT does not have init segments');
  34. };
  35. /**
  36. * @override
  37. * @throws {shaka.util.Error}
  38. */
  39. shaka.text.VttTextParser.prototype.parseMedia = function(data, time) {
  40. const VttTextParser = shaka.text.VttTextParser;
  41. // Get the input as a string. Normalize newlines to \n.
  42. let str = shaka.util.StringUtils.fromUTF8(data);
  43. str = str.replace(/\r\n|\r(?=[^\n]|$)/gm, '\n');
  44. let blocks = str.split(/\n{2,}/m);
  45. if (!/^WEBVTT($|[ \t\n])/m.test(blocks[0])) {
  46. throw new shaka.util.Error(
  47. shaka.util.Error.Severity.CRITICAL,
  48. shaka.util.Error.Category.TEXT,
  49. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  50. }
  51. let offset = time.segmentStart;
  52. if (offset == null) {
  53. // This is a probe, such as the HLS parser makes. We don't know the segment
  54. // start time, so we will use the X-TIMESTAMP-MAP header, if present, to get
  55. // the segment start time. By only doing this when segmentStart == null, we
  56. // protect against rollover in the MPEGTS field.
  57. // In case the attempt below doesn't work out, assume an offset of 0.
  58. offset = 0;
  59. if (blocks[0].indexOf('X-TIMESTAMP-MAP') >= 0) {
  60. // https://goo.gl/m7eVn9
  61. // The 'X-TIMESTAMP-MAP' header is used in HLS to align text with
  62. // the rest of the media.
  63. // The header format is 'X-TIMESTAMP-MAP=MPEGTS:n,LOCAL:m'
  64. // (the attributes can go in any order)
  65. // where n is MPEG-2 time and m is cue time it maps to.
  66. // For example 'X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:900000'
  67. // means an offset of 10 seconds
  68. // 900000/MPEG_TIMESCALE - cue time.
  69. let cueTimeMatch =
  70. blocks[0].match(/LOCAL:((?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{3}))/m);
  71. let mpegTimeMatch = blocks[0].match(/MPEGTS:(\d+)/m);
  72. if (cueTimeMatch && mpegTimeMatch) {
  73. let parser = new shaka.util.TextParser(cueTimeMatch[1]);
  74. let cueTime = shaka.text.VttTextParser.parseTime_(parser);
  75. let mpegTime = Number(mpegTimeMatch[1]);
  76. const mpegTimescale = shaka.text.VttTextParser.MPEG_TIMESCALE_;
  77. // Apple-encoded HLS content uses absolute timestamps, so assume the
  78. // presence of the map tag means the content uses absolute timestamps.
  79. offset = time.periodStart + (mpegTime / mpegTimescale - cueTime);
  80. }
  81. }
  82. }
  83. // Parse VTT regions.
  84. /* !Array.<!shakaExtern.CueRegion> */
  85. let regions = [];
  86. let lines = blocks[0].split('\n');
  87. for (let i = 1; i < lines.length; i++) {
  88. if (/^Region:/.test(lines[i])) {
  89. let region = VttTextParser.parseRegion_(lines[i]);
  90. regions.push(region);
  91. }
  92. }
  93. // Parse cues.
  94. let ret = [];
  95. for (let i = 1; i < blocks.length; i++) {
  96. lines = blocks[i].split('\n');
  97. let cue = VttTextParser.parseCue_(lines, offset, regions);
  98. if (cue) {
  99. ret.push(cue);
  100. }
  101. }
  102. return ret;
  103. };
  104. /**
  105. * Parses a string into a Region object.
  106. *
  107. * @param {string} text
  108. * @return {!shakaExtern.CueRegion}
  109. * @private
  110. */
  111. shaka.text.VttTextParser.parseRegion_ = function(text) {
  112. const VttTextParser = shaka.text.VttTextParser;
  113. let parser = new shaka.util.TextParser(text);
  114. // The region string looks like this:
  115. // Region: id=fred width=50% lines=3 regionanchor=0%,100%
  116. // viewportanchor=10%,90% scroll=up
  117. let region = new shaka.text.CueRegion();
  118. // Skip 'Region:'
  119. parser.readWord();
  120. parser.skipWhitespace();
  121. let word = parser.readWord();
  122. while (word) {
  123. if (!VttTextParser.parseRegionSetting_(region, word)) {
  124. shaka.log.warning('VTT parser encountered an invalid VTTRegion setting: ',
  125. word,
  126. ' The setting will be ignored.');
  127. }
  128. parser.skipWhitespace();
  129. word = parser.readWord();
  130. }
  131. return region;
  132. };
  133. /**
  134. * Parses a text block into a Cue object.
  135. *
  136. * @param {!Array.<string>} text
  137. * @param {number} timeOffset
  138. * @param {!Array.<!shakaExtern.CueRegion>} regions
  139. * @return {shaka.text.Cue}
  140. * @private
  141. */
  142. shaka.text.VttTextParser.parseCue_ = function(text, timeOffset, regions) {
  143. const VttTextParser = shaka.text.VttTextParser;
  144. // Skip empty blocks.
  145. if (text.length == 1 && !text[0]) {
  146. return null;
  147. }
  148. // Skip comment blocks.
  149. if (/^NOTE($|[ \t])/.test(text[0])) {
  150. return null;
  151. }
  152. // Skip style blocks.
  153. if (text[0] == 'STYLE') {
  154. return null;
  155. }
  156. let id = null;
  157. let index = text[0].indexOf('-->');
  158. if (index < 0) {
  159. id = text[0];
  160. text.splice(0, 1);
  161. }
  162. // Parse the times.
  163. let parser = new shaka.util.TextParser(text[0]);
  164. let start = VttTextParser.parseTime_(parser);
  165. let expect = parser.readRegex(/[ \t]+-->[ \t]+/g);
  166. let end = VttTextParser.parseTime_(parser);
  167. if (start == null || expect == null || end == null) {
  168. throw new shaka.util.Error(
  169. shaka.util.Error.Severity.CRITICAL,
  170. shaka.util.Error.Category.TEXT,
  171. shaka.util.Error.Code.INVALID_TEXT_CUE);
  172. }
  173. start += timeOffset;
  174. end += timeOffset;
  175. // Get the payload.
  176. let payload = text.slice(1).join('\n').trim();
  177. let cue = new shaka.text.Cue(start, end, payload);
  178. // Parse optional settings.
  179. parser.skipWhitespace();
  180. let word = parser.readWord();
  181. while (word) {
  182. if (!VttTextParser.parseCueSetting(cue, word, regions)) {
  183. shaka.log.warning('VTT parser encountered an invalid VTT setting: ',
  184. word,
  185. ' The setting will be ignored.');
  186. }
  187. parser.skipWhitespace();
  188. word = parser.readWord();
  189. }
  190. if (id != null) {
  191. cue.id = id;
  192. }
  193. return cue;
  194. };
  195. /**
  196. * Parses a WebVTT setting from the given word.
  197. *
  198. * @param {!shaka.text.Cue} cue
  199. * @param {string} word
  200. * @param {!Array.<!shaka.text.CueRegion>} regions
  201. * @return {boolean} True on success.
  202. */
  203. shaka.text.VttTextParser.parseCueSetting = function(cue, word, regions) {
  204. const VttTextParser = shaka.text.VttTextParser;
  205. let results = null;
  206. if ((results = /^align:(start|middle|center|end|left|right)$/.exec(word))) {
  207. VttTextParser.setTextAlign_(cue, results[1]);
  208. } else if ((results = /^vertical:(lr|rl)$/.exec(word))) {
  209. VttTextParser.setVerticalWritingDirection_(cue, results[1]);
  210. } else if ((results = /^size:([\d.]+)%$/.exec(word))) {
  211. cue.size = Number(results[1]);
  212. } else if ((results =
  213. /^position:([\d.]+)%(?:,(line-left|line-right|center|start|end))?$/
  214. .exec(word))) {
  215. cue.position = Number(results[1]);
  216. if (results[2]) {
  217. VttTextParser.setPositionAlign_(cue, results[2]);
  218. }
  219. } else if ((results = /^region:(.*)$/.exec(word))) {
  220. let region = VttTextParser.getRegionById_(regions, results[1]);
  221. if (region) {
  222. cue.region = region;
  223. }
  224. } else {
  225. return VttTextParser.parsedLineValueAndInterpretation_(cue, word);
  226. }
  227. return true;
  228. };
  229. /**
  230. *
  231. * @param {!Array.<!shaka.text.CueRegion>} regions
  232. * @param {string} id
  233. * @return {?shaka.text.CueRegion}
  234. * @private
  235. */
  236. shaka.text.VttTextParser.getRegionById_ = function(regions, id) {
  237. let regionsWithId = regions.filter(function(region) {
  238. return region.id == id;
  239. });
  240. if (!regionsWithId.length) {
  241. shaka.log.warning('VTT parser could not find a region with id: ',
  242. id,
  243. ' The region will be ignored.');
  244. return null;
  245. }
  246. goog.asserts.assert(regionsWithId.length == 1,
  247. 'VTTRegion ids should be unique!');
  248. return regionsWithId[0];
  249. };
  250. /**
  251. * Parses a WebVTTRegion setting from the given word.
  252. *
  253. * @param {!shaka.text.CueRegion} region
  254. * @param {string} word
  255. * @return {boolean} True on success.
  256. * @private
  257. */
  258. shaka.text.VttTextParser.parseRegionSetting_ = function(region, word) {
  259. let results = null;
  260. if ((results = /^id=(.*)$/.exec(word))) {
  261. region.id = results[1];
  262. } else if ((results = /^width=(\d{1,2}|100)%$/.exec(word))) {
  263. region.width = Number(results[1]);
  264. } else if ((results = /^lines=(\d+)$/.exec(word))) {
  265. region.height = Number(results[1]);
  266. region.heightUnits = shaka.text.CueRegion.units.LINES;
  267. } else if ((results = /^regionanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  268. .exec(word))) {
  269. region.regionAnchorX = Number(results[1]);
  270. region.regionAnchorY = Number(results[2]);
  271. } else if ((results = /^viewportanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  272. .exec(word))) {
  273. region.viewportAnchorX = Number(results[1]);
  274. region.viewportAnchorY = Number(results[2]);
  275. } else if ((results = /^scroll=up$/.exec(word))) {
  276. region.scroll = shaka.text.CueRegion.scrollMode.UP;
  277. } else {
  278. return false;
  279. }
  280. return true;
  281. };
  282. /**
  283. * @param {!shaka.text.Cue} cue
  284. * @param {string} align
  285. * @private
  286. */
  287. shaka.text.VttTextParser.setTextAlign_ = function(cue, align) {
  288. const Cue = shaka.text.Cue;
  289. if (align == 'middle') {
  290. cue.textAlign = Cue.textAlign.CENTER;
  291. } else {
  292. goog.asserts.assert(align.toUpperCase() in Cue.textAlign,
  293. align.toUpperCase() +
  294. ' Should be in Cue.textAlign values!');
  295. cue.textAlign = Cue.textAlign[align.toUpperCase()];
  296. }
  297. };
  298. /**
  299. * @param {!shaka.text.Cue} cue
  300. * @param {string} align
  301. * @private
  302. */
  303. shaka.text.VttTextParser.setPositionAlign_ = function(cue, align) {
  304. const Cue = shaka.text.Cue;
  305. if (align == 'line-left' || align == 'start') {
  306. cue.positionAlign = Cue.positionAlign.LEFT;
  307. } else if (align == 'line-right' || align == 'end') {
  308. cue.positionAlign = Cue.positionAlign.RIGHT;
  309. } else {
  310. cue.positionAlign = Cue.positionAlign.CENTER;
  311. }
  312. };
  313. /**
  314. * @param {!shaka.text.Cue} cue
  315. * @param {string} value
  316. * @private
  317. */
  318. shaka.text.VttTextParser.setVerticalWritingDirection_ = function(cue, value) {
  319. const Cue = shaka.text.Cue;
  320. if (value == 'lr') {
  321. cue.writingDirection = Cue.writingDirection.VERTICAL_LEFT_TO_RIGHT;
  322. } else {
  323. cue.writingDirection = Cue.writingDirection.VERTICAL_RIGHT_TO_LEFT;
  324. }
  325. };
  326. /**
  327. * @param {!shaka.text.Cue} cue
  328. * @param {string} word
  329. * @return {boolean}
  330. * @private
  331. */
  332. shaka.text.VttTextParser.parsedLineValueAndInterpretation_ =
  333. function(cue, word) {
  334. const Cue = shaka.text.Cue;
  335. let results = null;
  336. if ((results = /^line:([\d.]+)%(?:,(start|end|center))?$/.exec(word))) {
  337. cue.lineInterpretation = Cue.lineInterpretation.PERCENTAGE;
  338. cue.line = Number(results[1]);
  339. if (results[2]) {
  340. goog.asserts.assert(results[2].toUpperCase() in Cue.lineAlign,
  341. results[2].toUpperCase() +
  342. ' Should be in Cue.lineAlign values!');
  343. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  344. }
  345. } else if ((results = /^line:(-?\d+)(?:,(start|end|center))?$/.exec(word))) {
  346. cue.lineInterpretation = Cue.lineInterpretation.LINE_NUMBER;
  347. cue.line = Number(results[1]);
  348. if (results[2]) {
  349. goog.asserts.assert(results[2].toUpperCase() in Cue.lineAlign,
  350. results[2].toUpperCase() +
  351. ' Should be in Cue.lineAlign values!');
  352. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  353. }
  354. } else {
  355. return false;
  356. }
  357. return true;
  358. };
  359. /**
  360. * Parses a WebVTT time from the given parser.
  361. *
  362. * @param {!shaka.util.TextParser} parser
  363. * @return {?number}
  364. * @private
  365. */
  366. shaka.text.VttTextParser.parseTime_ = function(parser) {
  367. // 00:00.000 or 00:00:00.000 or 0:00:00.000
  368. let results = parser.readRegex(/(?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{3})/g);
  369. if (results == null) {
  370. return null;
  371. }
  372. // This capture is optional, but will still be in the array as undefined,
  373. // in which case it is 0.
  374. let hours = Number(results[1]) || 0;
  375. let minutes = Number(results[2]);
  376. let seconds = Number(results[3]);
  377. let miliseconds = Number(results[4]);
  378. if (minutes > 59 || seconds > 59) {
  379. return null;
  380. }
  381. return (miliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600);
  382. };
  383. /**
  384. * @const {number}
  385. * @private
  386. */
  387. shaka.text.VttTextParser.MPEG_TIMESCALE_ = 90000;
  388. shaka.text.TextEngine.registerParser(
  389. 'text/vtt',
  390. shaka.text.VttTextParser);
  391. shaka.text.TextEngine.registerParser(
  392. 'text/vtt; codecs="vtt"',
  393. shaka.text.VttTextParser);