From 4c99a7fb05a99b913275946b93dbde4d793db6ae Mon Sep 17 00:00:00 2001 From: quietvoid <39477805+quietvoid@users.noreply.github.com> Date: Tue, 8 Aug 2023 10:53:25 -0400 Subject: [PATCH] Implement support for encoding Dolby Vision from RPU file --- Cargo.lock | 80 +++++++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 1 + src/api/internal.rs | 20 ++++++++---- src/api/util.rs | 23 +++++++++++++ src/bin/common.rs | 42 ++++++++++++++++++++++++ src/bin/rav1e.rs | 31 +++++++++++++++--- src/encoder.rs | 24 ++++++++------ 7 files changed, 199 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 22d96be173..2a58412960 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,9 +83,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.71" +version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" [[package]] name = "aom-sys" @@ -230,6 +230,27 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82704769cb85a22df2c54d6bdd6a158b7931d256cf3248a07d6ecbe9d58b31d7" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "bitvec_helpers" +version = "3.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ef6883bd86b4112b56be19de3a1628de6c4063be7be6e641d484c83069efb4a" +dependencies = [ + "bitstream-io", +] + [[package]] name = "bstr" version = "1.6.0" @@ -443,6 +464,21 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "crc" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ec7a15cbe22e59248fc7eadb1907dab5ba09372595da4d73dd805ed4417dfe" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cace84e55f07e7301bae1c519df89cdad8cc3cd868413d3fdbdeca9ff3db484" + [[package]] name = "crc32fast" version = "1.3.2" @@ -583,6 +619,18 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dolby_vision" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0131539224b32982a9c9cb8e6bde1ac30388bf8d6f38aa2c734daf6a8ea4aa1" +dependencies = [ + "anyhow", + "bitvec", + "bitvec_helpers", + "crc", +] + [[package]] name = "either" version = "1.8.1" @@ -669,6 +717,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "getrandom" version = "0.2.10" @@ -1246,6 +1300,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -1299,6 +1359,7 @@ dependencies = [ "criterion", "crossbeam", "dav1d-sys", + "dolby_vision", "fern", "image", "interpolate_name", @@ -1641,6 +1702,12 @@ dependencies = [ "version-compare", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "target-lexicon" version = "0.12.8" @@ -2084,6 +2151,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "y4m" version = "0.8.0" diff --git a/Cargo.toml b/Cargo.toml index b871fb281c..2c28fddaaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,6 +108,7 @@ new_debug_unreachable = "1.0.4" once_cell = "1.18.0" av1-grain = { version = "0.2.2", features = ["serialize"] } serde-big-array = { version = "0.5.1", optional = true } +dolby_vision = { version = "3.2.0" } [dependencies.image] version = "0.24.6" diff --git a/src/api/internal.rs b/src/api/internal.rs index 7066065cf6..50e0592efd 100644 --- a/src/api/internal.rs +++ b/src/api/internal.rs @@ -520,12 +520,6 @@ impl ContextInner { return Err(EncoderStatus::NeedMoreData); } - let t35_metadata = if let Some(t35) = self.t35_q.remove(&input_frameno) { - t35 - } else { - Box::new([]) - }; - if output_frameno_in_gop > 0 { let next_keyframe_input_frameno = self.next_keyframe_input_frameno( self.gop_input_frameno_start[&output_frameno], @@ -554,6 +548,13 @@ impl ContextInner { *self.gop_input_frameno_start.get_mut(&output_frameno).unwrap() = next_keyframe_input_frameno; } else { + let t35_metadata = if let Some(t35) = self.t35_q.get(&input_frameno) + { + t35.clone() + } else { + Box::new([]) + }; + let fi = FrameInvariants::new_inter_frame( self.get_previous_coded_fi(output_frameno), &self.inter_cfg, @@ -576,6 +577,12 @@ impl ContextInner { } } + let t35_metadata = if let Some(t35) = self.t35_q.get(&input_frameno) { + t35.clone() + } else { + Box::new([]) + }; + // Now that we know the input_frameno, look up the correct frame type let frame_type = if self.keyframes.contains(&input_frameno) { FrameType::KEY @@ -1516,6 +1523,7 @@ impl ContextInner { if let Ok(ref mut pkt) = ret { self.garbage_collect(pkt.input_frameno); pkt.opaque = self.opaque_q.remove(&pkt.input_frameno); + self.t35_q.remove(&pkt.input_frameno); } ret diff --git a/src/api/util.rs b/src/api/util.rs index a2ab9794e6..1fee77506c 100644 --- a/src/api/util.rs +++ b/src/api/util.rs @@ -8,6 +8,7 @@ // PATENTS file, you can obtain it at www.aomedia.org/license/patent. #![deny(missing_docs)] +use crate::encoder::FrameInvariants; use crate::frame::*; use crate::serialize::{Deserialize, Serialize}; use crate::stats::EncoderStats; @@ -137,6 +138,12 @@ impl fmt::Display for FrameType { } } +/// Dolby Vision T.35 metadata payload expected prefix. +pub const T35_DOVI_PAYLOAD_PREFIX: &[u8] = &[ + 0x00, 0x03B, // Dolby + 0x00, 0x00, 0x08, 0x00, 0x37, 0xCD, 0x08, +]; + /// A single T.35 metadata packet. #[derive(Clone, Debug, Default)] pub struct T35 { @@ -299,3 +306,19 @@ impl IntoFrame for (Frame, Option) { (Some(Arc::new(self.0)), self.1) } } + +impl T35 { + /// Whether the T.35 metadata is Dolby Vision Metadata. + pub fn is_dovi_metadata(&self) -> bool { + self.country_code == 0xB5 && self.data.starts_with(T35_DOVI_PAYLOAD_PREFIX) + } + + /// Returns true if the T35 metadata can be added to the frame + pub fn is_valid_placement(&self, fi: &FrameInvariants) -> bool { + if self.is_dovi_metadata() { + return fi.show_frame || fi.is_show_existing_frame(); + } + + true + } +} diff --git a/src/bin/common.rs b/src/bin/common.rs index 49f525b430..caf9a1453e 100644 --- a/src/bin/common.rs +++ b/src/bin/common.rs @@ -18,6 +18,8 @@ use rav1e::prelude::*; use scan_fmt::scan_fmt; use rav1e::config::CpuFeatureLevel; + +use std::collections::BTreeMap; use std::fs::File; use std::io; use std::io::prelude::*; @@ -195,6 +197,15 @@ pub struct CliOptions { help_heading = "ENCODE SETTINGS" )] pub film_grain_table: Option, + /// Uses a Dolby Vision RPU file to add as T.35 metadata to the encode. + /// The RPU must be in the same format as for x265 + #[clap( + long, + alias = "dolby-vision-rpu", + value_parser, + help_heading = "ENCODE SETTINGS" + )] + pub dovi_rpu: Option, /// Pixel range #[clap(long, value_parser, help_heading = "VIDEO METADATA")] @@ -339,6 +350,7 @@ pub struct ParsedCliOptions { pub photon_noise: u8, #[cfg(feature = "unstable")] pub slots: usize, + pub dovi_payloads: Option>, } #[cfg(feature = "serialize")] @@ -466,6 +478,35 @@ pub fn parse_cli() -> Result { panic!("A limit cannot be set above 1 in still picture mode"); } + let dovi_payloads = if let Some(rpu_file) = matches.dovi_rpu.as_ref() { + let rpus = dolby_vision::rpu::utils::parse_rpu_file(rpu_file) + .expect("Failed to read Dolby Vision RPU file"); + + let payloads: BTreeMap = rpus + .iter() + .filter_map(|rpu| { + rpu + .write_av1_rpu_metadata_obu_t35_payload() + .map(|payload| T35 { + country_code: 0xB5, + country_code_extension_byte: 0x00, + data: payload.into_boxed_slice(), + }) + .ok() + }) + .zip(0u64..) + .map(|(payload, frame_no)| (frame_no, payload)) + .collect(); + + if !payloads.is_empty() { + Some(payloads) + } else { + None + } + } else { + None + }; + #[cfg(feature = "unstable")] let slots = matches.slots; @@ -484,6 +525,7 @@ pub fn parse_cli() -> Result { pass2file_name: matches.second_pass.clone(), save_config: save_config_path, photon_noise: matches.photon_noise, + dovi_payloads, #[cfg(feature = "unstable")] slots, }) diff --git a/src/bin/rav1e.rs b/src/bin/rav1e.rs index a5a53c9e85..33ecd05f14 100644 --- a/src/bin/rav1e.rs +++ b/src/bin/rav1e.rs @@ -101,6 +101,8 @@ use rust_hawktracer::*; use crate::decoder::{Decoder, FrameBuilder, VideoDetails}; use crate::muxer::*; + +use std::collections::BTreeMap; use std::fs::File; use std::io::{Read, Seek, Write}; use std::process::exit; @@ -151,6 +153,7 @@ impl Source { #[hawktracer(Source_read_frame)] fn read_frame( &mut self, ctx: &mut Context, video_info: VideoDetails, + fp: Option, ) -> Result<(), CliError> { if self.limit != 0 && self.count == self.limit { ctx.flush(); @@ -172,7 +175,7 @@ impl Source { _ => return Err(CliError::new("Unsupported bit depth")), } self.count += 1; - let _ = ctx.send_frame(Some(Arc::new(frame))); + let _ = ctx.send_frame((Arc::new(frame), fp)); } _ => { ctx.flush(); @@ -189,7 +192,7 @@ fn process_frame( ctx: &mut Context, output_file: &mut dyn Muxer, source: &mut Source, pass1file: Option<&mut File>, pass2file: Option<&mut File>, mut y4m_enc: Option<&mut y4m::Encoder>>, - metrics_cli: MetricsEnabled, + metrics_cli: MetricsEnabled, dovi_payloads: Option<&BTreeMap>, ) -> Result>, CliError> { let y4m_details = source.input.get_video_details(); let mut frame_summaries = Vec::new(); @@ -237,7 +240,24 @@ fn process_frame( Ok((Some(frame_summaries), true)) } Err(EncoderStatus::NeedMoreData) => { - source.read_frame(ctx, y4m_details)?; + let mut fp = None; + let mut t35_metadata: Option> = None; + + if let Some(payloads) = dovi_payloads { + if let Some(payload) = payloads.get(&(source.count as u64)) { + let t35_list = t35_metadata.get_or_insert_with(Vec::new); + t35_list.push(payload.clone()); + } + } + + if let Some(t35_metadata) = t35_metadata { + fp = Some(FrameParameters { + t35_metadata: t35_metadata.into_boxed_slice(), + ..Default::default() + }); + } + + source.read_frame(ctx, y4m_details, fp)?; Ok((Some(frame_summaries), false)) } Err(EncoderStatus::EnoughData) => { @@ -296,7 +316,7 @@ fn do_encode( output: &mut dyn Muxer, mut source: Source, mut pass1file: Option, mut pass2file: Option, mut y4m_enc: Option>>, - metrics_enabled: MetricsEnabled, + metrics_enabled: MetricsEnabled, dovi_payloads: Option<&BTreeMap>, ) -> Result<(), CliError> { let mut ctx: Context = cfg.new_context().map_err(|e| e.context("Invalid encoder settings"))?; @@ -323,6 +343,7 @@ fn do_encode( pass2file.as_mut(), y4m_enc.as_mut(), metrics_enabled, + dovi_payloads, )? { if verbose != Verboseness::Quiet { for frame in frame_info { @@ -632,6 +653,7 @@ fn run() -> Result<(), error::CliError> { pass2file, y4m_enc, cli.metrics_enabled, + cli.dovi_payloads.as_ref(), )? } else { do_encode::>>( @@ -644,6 +666,7 @@ fn run() -> Result<(), error::CliError> { pass2file, y4m_enc, cli.metrics_enabled, + cli.dovi_payloads.as_ref(), )? } if cli.benchmark { diff --git a/src/encoder.rs b/src/encoder.rs index 8eac5c287b..53aefcbaf2 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -3686,11 +3686,9 @@ pub fn encode_show_existing_frame( } for t35 in fi.t35_metadata.iter() { - let mut t35_buf = Vec::new(); - let mut t35_bw = BitWriter::endian(&mut t35_buf, BigEndian); - t35_bw.write_t35_metadata_obu(t35).unwrap(); - packet.write_all(&t35_buf).unwrap(); - t35_buf.clear(); + if t35.is_valid_placement(fi) { + write_t35_metadata_packet(&mut packet, t35); + } } let mut buf1 = Vec::new(); @@ -3767,11 +3765,9 @@ pub fn encode_frame( } for t35 in fi.t35_metadata.iter() { - let mut t35_buf = Vec::new(); - let mut t35_bw = BitWriter::endian(&mut t35_buf, BigEndian); - t35_bw.write_t35_metadata_obu(t35).unwrap(); - packet.write_all(&t35_buf).unwrap(); - t35_buf.clear(); + if t35.is_valid_placement(fi) { + write_t35_metadata_packet(&mut packet, t35); + } } let mut buf1 = Vec::new(); @@ -3827,6 +3823,14 @@ pub fn update_rec_buffer( } } +fn write_t35_metadata_packet(packet: &mut Vec, t35: &T35) { + let mut t35_buf = Vec::new(); + let mut t35_bw = BitWriter::endian(&mut t35_buf, BigEndian); + t35_bw.write_t35_metadata_obu(t35).unwrap(); + packet.write_all(&t35_buf).unwrap(); + t35_buf.clear(); +} + #[cfg(test)] mod test { use super::*;