Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor WebVTT building logic into WebVTT::Builder #4070

Merged
merged 7 commits into from
Oct 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions spec/helpers/vtt/builder_spec.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
require "../../spec_helper.cr"

MockLines = [
{
"start_time": Time::Span.new(seconds: 1),
"end_time": Time::Span.new(seconds: 2),
"text": "Line 1",
},

{
"start_time": Time::Span.new(seconds: 2),
"end_time": Time::Span.new(seconds: 3),
"text": "Line 2",
},
]

Spectator.describe "WebVTT::Builder" do
it "correctly builds a vtt file" do
result = WebVTT.build do |vtt|
MockLines.each do |line|
vtt.cue(line["start_time"], line["end_time"], line["text"])
end
end

expect(result).to eq([
"WEBVTT",
"",
"00:00:01.000 --> 00:00:02.000",
"Line 1",
"",
"00:00:02.000 --> 00:00:03.000",
"Line 2",
"",
"",
].join('\n'))
end

it "correctly builds a vtt file with setting fields" do
setting_fields = {
"Kind" => "captions",
"Language" => "en",
}

result = WebVTT.build(setting_fields) do |vtt|
MockLines.each do |line|
vtt.cue(line["start_time"], line["end_time"], line["text"])
end
end

expect(result).to eq([
"WEBVTT",
"Kind: captions",
"Language: en",
"",
"00:00:01.000 --> 00:00:02.000",
"Line 1",
"",
"00:00:02.000 --> 00:00:03.000",
"Line 2",
"",
"",
].join('\n'))
end
end
67 changes: 67 additions & 0 deletions src/invidious/helpers/webvtt.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Namespace for logic relating to generating WebVTT files
#
# Probably not compliant to WebVTT's specs but it is enough for Invidious.
module WebVTT
# A WebVTT builder generates WebVTT files
private class Builder
def initialize(@io : IO)
end

# Writes an vtt cue with the specified time stamp and contents
def cue(start_time : Time::Span, end_time : Time::Span, text : String)
timestamp(start_time, end_time)
@io << text
@io << "\n\n"
end

private def timestamp(start_time : Time::Span, end_time : Time::Span)
timestamp_component(start_time)
@io << " --> "
timestamp_component(end_time)

@io << '\n'
end

private def timestamp_component(timestamp : Time::Span)
@io << timestamp.hours.to_s.rjust(2, '0')
@io << ':' << timestamp.minutes.to_s.rjust(2, '0')
@io << ':' << timestamp.seconds.to_s.rjust(2, '0')
@io << '.' << timestamp.milliseconds.to_s.rjust(3, '0')
end

def document(setting_fields : Hash(String, String)? = nil, &)
@io << "WEBVTT\n"

if setting_fields
setting_fields.each do |name, value|
@io << name << ": " << value << '\n'
end
end

@io << '\n'

yield
end
end

# Returns the resulting `String` of writing WebVTT to the yielded `WebVTT::Builder`
#
# ```
# string = WebVTT.build do |vtt|
# vtt.cue(Time::Span.new(seconds: 1), Time::Span.new(seconds: 2), "Line 1")
# vtt.cue(Time::Span.new(seconds: 2), Time::Span.new(seconds: 3), "Line 2")
# end
#
# string # => "WEBVTT\n\n00:00:01.000 --> 00:00:02.000\nLine 1\n\n00:00:02.000 --> 00:00:03.000\nLine 2\n\n"
# ```
#
# Accepts an optional settings fields hash to add settings attribute to the resulting vtt file.
def self.build(setting_fields : Hash(String, String)? = nil, &)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The setting_fields here is mostly for backwards compatibility. I'm pretty sure — or at least I couldn't find any reference to it — that the fields Invidious adds aren't actually apart of WebVTT

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it looks completely non-standard. I couldn't find anything about them on MDN

String.build do |str|
builder = Builder.new(str)
builder.document(setting_fields) do
yield builder
end
end
end
end
39 changes: 10 additions & 29 deletions src/invidious/routes/api/v1/videos.cr
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,17 @@ module Invidious::Routes::API::V1::Videos
if caption.name.includes? "auto-generated"
caption_xml = YT_POOL.client &.get(url).body

settings_field = {
"Kind" => "captions",
"Language" => "#{tlang || caption.language_code}",
}

if caption_xml.starts_with?("<?xml")
webvtt = caption.timedtext_to_vtt(caption_xml, tlang)
else
caption_xml = XML.parse(caption_xml)

webvtt = String.build do |str|
str << <<-END_VTT
WEBVTT
Kind: captions
Language: #{tlang || caption.language_code}


END_VTT

webvtt = WebVTT.build(settings_field) do |webvtt|
caption_nodes = caption_xml.xpath_nodes("//transcript/text")
caption_nodes.each_with_index do |node, i|
start_time = node["start"].to_f.seconds
Expand All @@ -127,22 +124,14 @@ module Invidious::Routes::API::V1::Videos
end_time = start_time + duration
end

start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}"
end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}"

text = HTML.unescape(node.content)
text = text.gsub(/<font color="#[a-fA-F0-9]{6}">/, "")
text = text.gsub(/<\/font>/, "")
if md = text.match(/(?<name>.*) : (?<text>.*)/)
text = "<v #{md["name"]}>#{md["text"]}</v>"
end

str << <<-END_CUE
#{start_time} --> #{end_time}
#{text}


END_CUE
webvtt.cue(start_time, end_time, text)
end
end
end
Expand Down Expand Up @@ -215,11 +204,7 @@ module Invidious::Routes::API::V1::Videos
storyboard = storyboard[0]
end

String.build do |str|
str << <<-END_VTT
WEBVTT
END_VTT

WebVTT.build do |vtt|
start_time = 0.milliseconds
end_time = storyboard[:interval].milliseconds

Expand All @@ -231,12 +216,8 @@ module Invidious::Routes::API::V1::Videos

storyboard[:storyboard_height].times do |j|
storyboard[:storyboard_width].times do |k|
str << <<-END_CUE
#{start_time}.000 --> #{end_time}.000
#{url}#xywh=#{storyboard[:width] * k},#{storyboard[:height] * j},#{storyboard[:width] - 2},#{storyboard[:height]}


END_CUE
current_cue_url = "#{url}#xywh=#{storyboard[:width] * k},#{storyboard[:height] * j},#{storyboard[:width] - 2},#{storyboard[:height]}"
vtt.cue(start_time, end_time, current_cue_url)

start_time += storyboard[:interval].milliseconds
end_time += storyboard[:interval].milliseconds
Expand Down
41 changes: 12 additions & 29 deletions src/invidious/videos/caption.cr
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,13 @@ module Invidious::Videos
break
end
end
result = String.build do |result|
result << <<-END_VTT
WEBVTT
Kind: captions
Language: #{tlang || @language_code}

settings_field = {
"Kind" => "captions",
"Language" => "#{tlang || @language_code}",
}

END_VTT

result << "\n\n"

result = WebVTT.build(settings_field) do |vtt|
cues.each_with_index do |node, i|
start_time = node["t"].to_f.milliseconds

Expand All @@ -76,29 +72,16 @@ module Invidious::Videos
end_time = start_time + duration
end

# start_time
result << start_time.hours.to_s.rjust(2, '0')
result << ':' << start_time.minutes.to_s.rjust(2, '0')
result << ':' << start_time.seconds.to_s.rjust(2, '0')
result << '.' << start_time.milliseconds.to_s.rjust(3, '0')

result << " --> "

# end_time
result << end_time.hours.to_s.rjust(2, '0')
result << ':' << end_time.minutes.to_s.rjust(2, '0')
result << ':' << end_time.seconds.to_s.rjust(2, '0')
result << '.' << end_time.milliseconds.to_s.rjust(3, '0')

result << "\n"

node.children.each do |s|
result << s.content
text = String.build do |io|
node.children.each do |s|
io << s.content
end
end
result << "\n"
result << "\n"

vtt.cue(start_time, end_time, text)
end
end

return result
end
end
Expand Down
40 changes: 7 additions & 33 deletions src/invidious/videos/transcript.cr
Original file line number Diff line number Diff line change
Expand Up @@ -34,41 +34,15 @@ module Invidious::Videos
# Convert into array of TranscriptLine
lines = self.parse(initial_data)

# Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt()
vtt = String.build do |vtt|
vtt << <<-END_VTT
WEBVTT
Kind: captions
Language: #{target_language}


END_VTT

vtt << "\n\n"
settings_field = {
"Kind" => "captions",
"Language" => target_language,
}

# Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt()
vtt = WebVTT.build(settings_field) do |vtt|
lines.each do |line|
start_time = line.start_ms
end_time = line.end_ms

# start_time
vtt << start_time.hours.to_s.rjust(2, '0')
vtt << ':' << start_time.minutes.to_s.rjust(2, '0')
vtt << ':' << start_time.seconds.to_s.rjust(2, '0')
vtt << '.' << start_time.milliseconds.to_s.rjust(3, '0')

vtt << " --> "

# end_time
vtt << end_time.hours.to_s.rjust(2, '0')
vtt << ':' << end_time.minutes.to_s.rjust(2, '0')
vtt << ':' << end_time.seconds.to_s.rjust(2, '0')
vtt << '.' << end_time.milliseconds.to_s.rjust(3, '0')

vtt << "\n"
vtt << line.line

vtt << "\n"
vtt << "\n"
vtt.cue(line.start_ms, line.end_ms, line.line)
end
end

Expand Down