bnosac · jwijffels · Oct 2, 2024 · Oct 2, 2024 · Oct 2, 2024 · Oct 2, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: audio.whisper
 Type: Package
 Title: Transcribe Audio Files using the "Whisper" Automatic Speech Recognition Model
-Version: 0.4.1
+Version: 0.5.0
 Maintainer: Jan Wijffels <[email protected]>
 Authors@R: c(
     person('Jan', 'Wijffels', role = c('aut', 'cre', 'cph'), email = '[email protected]', comment = "R wrapper"), 
@@ -29,6 +29,5 @@ Suggests:
     audio.vadwebrtc (>= 0.2.0)
 LinkingTo: Rcpp
 SystemRequirements: GNU make
-RoxygenNote: 7.1.2
+RoxygenNote: 7.3.2
 Remotes: bnosac/audio.vadwebrtc
-
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+## CHANGES IN audio.whisper VERSION 0.5.0
+
+- Upgrade to whisper.cpp version v1.7.0
+- Enable flash attention
+
 ## CHANGES IN audio.whisper VERSION 0.4.1
 
 - Added function predict.whisper_transcription which allows to assign a transcription segment to either a left/right channel based on a Voice Activity Detection

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -1,8 +1,8 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-whisper_load_model <- function(model, use_gpu = FALSE) {
-    .Call('_audio_whisper_whisper_load_model', PACKAGE = 'audio.whisper', model, use_gpu)
+whisper_load_model <- function(model, use_gpu = FALSE, flash_attn = FALSE) {
+    .Call('_audio_whisper_whisper_load_model', PACKAGE = 'audio.whisper', model, use_gpu, flash_attn)
 }
 
 whisper_encode <- function(model, path, language, token_timestamps = FALSE, translate = FALSE, duration = 0L, offset = 0L, trace = 1L, n_threads = 1L, n_processors = 1L, entropy_thold = 2.40, logprob_thold = -1.00, beam_size = -1L, best_of = 5L, split_on_word = FALSE, max_context = -1L, prompt = "", print_special = FALSE, diarize = FALSE, diarize_percent = 1.1) {

diff --git a/R/whisper.R b/R/whisper.R
@@ -167,6 +167,7 @@ align_skipped <- function(sentences, skipped, from = "from", to = "to"){
 #' @param x the path to a model, an object returned by \code{\link{whisper_download_model}} or a character string with 
 #' the name of the model which can be passed on to \code{\link{whisper_download_model}}
 #' @param use_gpu logical indicating to use the GPU in case you have Metal or an NVIDIA GPU. Defaults to \code{FALSE}.
+#' @param flash_attn logical indicating to use flash attention. Defaults to \code{FALSE}.
 #' @param overwrite logical indicating to overwrite the model file if the model file was already downloaded, passed on to \code{\link{whisper_download_model}}. Defaults to \code{FALSE}.
 #' @param model_dir a path where the model will be downloaded to, passed on to \code{\link{whisper_download_model}}. 
 #' Defaults to the environment variable \code{WHISPER_MODEL_DIR} and if this is not set, the current working directory
@@ -236,7 +237,7 @@ align_skipped <- function(sentences, skipped, from = "from", to = "to"){
 #' trans <- predict(model, newdata = system.file(package = "audio.whisper", "samples", "jfk.wav"), 
 #'                  language = "en", duration = 1000)
 #' }
-whisper <- function(x, use_gpu = FALSE, overwrite = FALSE, model_dir = Sys.getenv("WHISPER_MODEL_DIR", unset = getwd()), ...){
+whisper <- function(x, use_gpu = FALSE, flash_attn = FALSE, overwrite = FALSE, model_dir = Sys.getenv("WHISPER_MODEL_DIR", unset = getwd()), ...){
   if(x %in% c("tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large",
               "tiny-q5_1", "tiny.en-q5_1", 
               "base-q5_1", "base.en-q5_1", 
@@ -251,7 +252,7 @@ whisper <- function(x, use_gpu = FALSE, overwrite = FALSE, model_dir = Sys.geten
     out        <- list(file = x)  
   }
   Sys.setenv("GGML_METAL_PATH_RESOURCES" = Sys.getenv("GGML_METAL_PATH_RESOURCES", unset = system.file(package = "audio.whisper", "metal")))
-  out$model <- whisper_load_model(out$file, use_gpu = use_gpu, ...)
+  out$model <- whisper_load_model(out$file, use_gpu = use_gpu, flash_attn = flash_attn, ...)
   class(out) <- "whisper"
   out
 }

diff --git a/man/whisper.Rd b/man/whisper.Rd
diff --git a/man/whisper_download_model.Rd b/man/whisper_download_model.Rd