software-mansion
diff --git a/‎ios/RnExecutorch/OCR.mm
+80-47 b/‎ios/RnExecutorch/OCR.mm
+80-47
diff --git a/‎ios/RnExecutorch/models/ocr/Detector.h
+1-1 b/‎ios/RnExecutorch/models/ocr/Detector.h
+1-1
diff --git a/‎ios/RnExecutorch/models/ocr/Detector.mm
+42-24 b/‎ios/RnExecutorch/models/ocr/Detector.mm
+42-24
diff --git a/‎ios/RnExecutorch/models/ocr/RecognitionHandler.h
+10-3 b/‎ios/RnExecutorch/models/ocr/RecognitionHandler.h
+10-3
@@ -1,10 +1,10 @@
-#import <ExecutorchLib/ETModel.h>
-#import <React/RCTBridgeModule.h>
 #import "OCR.h"
-#import "utils/Fetcher.h"
-#import "utils/ImageProcessor.h"
 #import "models/ocr/Detector.h"
 #import "models/ocr/RecognitionHandler.h"
+#import "utils/Fetcher.h"
+#import "utils/ImageProcessor.h"
+#import <ExecutorchLib/ETModel.h>
+#import <React/RCTBridgeModule.h>
 
 @implementation OCR {
   Detector *detector;
@@ -14,70 +14,103 @@ @implementation OCR {
 RCT_EXPORT_MODULE()
 
 - (void)loadModule:(NSString *)detectorSource
-recognizerSourceLarge:(NSString *)recognizerSourceLarge
-recognizerSourceMedium:(NSString *)recognizerSourceMedium
-recognizerSourceSmall:(NSString *)recognizerSourceSmall
-           symbols:(NSString *)symbols
-  languageDictPath:(NSString *)languageDictPath
-           resolve:(RCTPromiseResolveBlock)resolve
-            reject:(RCTPromiseRejectBlock)reject {
+     recognizerSourceLarge:(NSString *)recognizerSourceLarge
+    recognizerSourceMedium:(NSString *)recognizerSourceMedium
+     recognizerSourceSmall:(NSString *)recognizerSourceSmall
+                   symbols:(NSString *)symbols
+          languageDictPath:(NSString *)languageDictPath
+                   resolve:(RCTPromiseResolveBlock)resolve
+                    reject:(RCTPromiseRejectBlock)reject {
   detector = [[Detector alloc] init];
-  [detector loadModel:[NSURL URLWithString:detectorSource] completion:^(BOOL success, NSNumber *errorCode) {
-    if (!success) {
-      NSError *error = [NSError errorWithDomain:@"OCRErrorDomain"
-                                           code:[errorCode intValue]
-                                       userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithFormat:@"%ld", (long)[errorCode longValue]]}];
-      reject(@"init_module_error", @"Failed to initialize detector module", error);
-      return;
-    }
-    [Fetcher fetchResource:[NSURL URLWithString:languageDictPath] resourceType:ResourceType::TXT completionHandler:^(NSString *filePath, NSError *error) {
-      if (error) {
-        reject(@"init_module_error", @"Failed to initialize converter module", error);
-        return;
-      }
-      
-      self->recognitionHandler = [[RecognitionHandler alloc] initWithSymbols:symbols languageDictPath:filePath];
-      [self->recognitionHandler loadRecognizers:recognizerSourceLarge mediumRecognizerPath:recognizerSourceMedium smallRecognizerPath:recognizerSourceSmall completion:^(BOOL allModelsLoaded, NSNumber *errorCode) {
-        if (allModelsLoaded) {
-          resolve(@(YES));
-        } else {
-          NSError *error = [NSError errorWithDomain:@"OCRErrorDomain"
-                                               code:[errorCode intValue]
-                                           userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithFormat:@"%ld", (long)[errorCode longValue]]}];
-          reject(@"init_recognizer_error", @"Failed to initialize one or more recognizer models", error);
+  [detector
+       loadModel:[NSURL URLWithString:detectorSource]
+      completion:^(BOOL success, NSNumber *errorCode) {
+        if (!success) {
+          NSError *error = [NSError
+              errorWithDomain:@"OCRErrorDomain"
+                         code:[errorCode intValue]
+                     userInfo:@{
+                       NSLocalizedDescriptionKey : [NSString
+                           stringWithFormat:@"%ld", (long)[errorCode longValue]]
+                     }];
+          reject(@"init_module_error", @"Failed to initialize detector module",
+                 error);
+          return;
         }
+        [Fetcher fetchResource:[NSURL URLWithString:languageDictPath]
+                  resourceType:ResourceType::TXT
+             completionHandler:^(NSString *filePath, NSError *error) {
+               if (error) {
+                 reject(@"init_module_error",
+                        @"Failed to initialize converter module", error);
+                 return;
+               }
+
+               self->recognitionHandler =
+                   [[RecognitionHandler alloc] initWithSymbols:symbols
+                                              languageDictPath:filePath];
+               [self->recognitionHandler
+                        loadRecognizers:recognizerSourceLarge
+                   mediumRecognizerPath:recognizerSourceMedium
+                    smallRecognizerPath:recognizerSourceSmall
+                             completion:^(BOOL allModelsLoaded,
+                                          NSNumber *errorCode) {
+                               if (allModelsLoaded) {
+                                 resolve(@(YES));
+                               } else {
+                                 NSError *error = [NSError
+                                     errorWithDomain:@"OCRErrorDomain"
+                                                code:[errorCode intValue]
+                                            userInfo:@{
+                                              NSLocalizedDescriptionKey :
+                                                  [NSString stringWithFormat:
+                                                                @"%ld",
+                                                                (long)[errorCode
+                                                                    longValue]]
+                                            }];
+                                 reject(@"init_recognizer_error",
+                                        @"Failed to initialize one or more "
+                                        @"recognizer models",
+                                        error);
+                               }
+                             }];
+             }];
       }];
-    }];
-  }];
 }
 
 - (void)forward:(NSString *)input
         resolve:(RCTPromiseResolveBlock)resolve
          reject:(RCTPromiseRejectBlock)reject {
   /*
    The OCR consists of two phases:
-   1. Detection - detecting text regions in the image, the result of this phase is a list of bounding boxes.
-   2. Recognition - recognizing the text in the bounding boxes, the result is a list of strings and corresponding confidence scores.
-   
-   Recognition uses three models, each model is resposible for recognizing text of different sizes (e.g. large - 512x64, medium - 256x64, small - 128x64).
+   1. Detection - detecting text regions in the image, the result of this phase
+   is a list of bounding boxes.
+   2. Recognition - recognizing the text in the bounding boxes, the result is a
+   list of strings and corresponding confidence scores.
+
+   Recognition uses three models, each model is resposible for recognizing text
+   of different sizes (e.g. large - 512x64, medium - 256x64, small - 128x64).
    */
   @try {
     cv::Mat image = [ImageProcessor readImage:input];
-    NSArray* result = [detector runModel:image];
+    NSArray *result = [detector runModel:image];
     cv::Size detectorSize = [detector getModelImageSize];
     cv::cvtColor(image, image, cv::COLOR_BGR2GRAY);
-    result = [self->recognitionHandler recognize:result imgGray:image desiredWidth:detectorSize.width * recognizerRatio desiredHeight:detectorSize.height * recognizerRatio];
+    result = [self->recognitionHandler
+            recognize:result
+              imgGray:image
+         desiredWidth:detectorSize.width * recognizerRatio
+        desiredHeight:detectorSize.height * recognizerRatio];
     resolve(result);
   } @catch (NSException *exception) {
-    reject(@"forward_error", [NSString stringWithFormat:@"%@", exception.reason],
-           nil);
+    reject(@"forward_error",
+           [NSString stringWithFormat:@"%@", exception.reason], nil);
   }
 }
 
 - (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
-(const facebook::react::ObjCTurboModule::InitParams &)params {
-  return std::make_shared<facebook::react::NativeOCRSpecJSI>(
-                                                             params);
+    (const facebook::react::ObjCTurboModule::InitParams &)params {
+  return std::make_shared<facebook::react::NativeOCRSpecJSI>(params);
 }
 
 @end
@@ -1,6 +1,6 @@
-#import "opencv2/opencv.hpp"
 #import "BaseModel.h"
 #import "RecognitionHandler.h"
+#import "opencv2/opencv.hpp"
 
 constexpr CGFloat textThreshold = 0.4;
 constexpr CGFloat linkThreshold = 0.4;
 
@@ -4,72 +4,90 @@
 #import "utils/OCRUtils.h"
 
 /*
- The model used as detector is based on CRAFT (Character Region Awareness for Text Detection) paper.
- https://arxiv.org/pdf/1904.01941
+ The model used as detector is based on CRAFT (Character Region Awareness for
+ Text Detection) paper. https://arxiv.org/pdf/1904.01941
  */
 
 @implementation Detector {
   cv::Size originalSize;
   cv::Size modelSize;
 }
 
-- (cv::Size)getModelImageSize{
-  if(!modelSize.empty()) {
+- (cv::Size)getModelImageSize {
+  if (!modelSize.empty()) {
     return modelSize;
   }
-  
-  NSArray *inputShape = [module getInputShape: @0];
+
+  NSArray *inputShape = [module getInputShape:@0];
   NSNumber *widthNumber = inputShape.lastObject;
   NSNumber *heightNumber = inputShape[inputShape.count - 2];
-  
+
   const int height = [heightNumber intValue];
   const int width = [widthNumber intValue];
   modelSize = cv::Size(height, width);
-  
+
   return cv::Size(height, width);
 }
 
 - (NSArray *)preprocess:(cv::Mat &)input {
   /*
    Detector as an input accepts tensor with a shape of [1, 3, 800, 800].
-   Due to big influence of resize to quality of recognition the image preserves original
-   aspect ratio and the missing parts are filled with padding.
+   Due to big influence of resize to quality of recognition the image preserves
+   original aspect ratio and the missing parts are filled with padding.
    */
   self->originalSize = cv::Size(input.cols, input.rows);
-  
+
   cv::Size modelImageSize = [self getModelImageSize];
   cv::Mat resizedImage;
-  resizedImage = [OCRUtils resizeWithPadding:input desiredWidth:modelImageSize.width desiredHeight:modelImageSize.height];
-  NSArray *modelInput = [ImageProcessor matToNSArray: resizedImage mean:mean variance:variance];
+  resizedImage = [OCRUtils resizeWithPadding:input
+                                desiredWidth:modelImageSize.width
+                               desiredHeight:modelImageSize.height];
+  NSArray *modelInput = [ImageProcessor matToNSArray:resizedImage
+                                                mean:mean
+                                            variance:variance];
   return modelInput;
 }
 
 - (NSArray *)postprocess:(NSArray *)output {
   /*
    The output of the model consists of two matrices (heat maps):
    1. ScoreText(Score map) - The probability of a region containing character
-   2. ScoreAffinity(Affinity map) - affinity between characters, used to to group each character into a single instance (sequence)
-   Both matrices are 400x400
-   
+   2. ScoreAffinity(Affinity map) - affinity between characters, used to to
+   group each character into a single instance (sequence) Both matrices are
+   400x400
+
    The result of this step is a list of bounding boxes that contain text.
    */
   NSArray *predictions = [output objectAtIndex:0];
-  
+
   cv::Size modelImageSize = [self getModelImageSize];
   cv::Mat scoreTextCV, scoreAffinityCV;
   /*
-   The output of the model is a matrix in size of input image containing two matrices representing heatmap.
-   Those two matrices are in the size of half of the input  image, that's why the width and height is divided by 2.
+   The output of the model is a matrix in size of input image containing two
+   matrices representing heatmap. Those two matrices are in the size of half of
+   the input  image, that's why the width and height is divided by 2.
    */
   [DetectorUtils interleavedArrayToMats:predictions
                              outputMat1:scoreTextCV
                              outputMat2:scoreAffinityCV
-                               withSize:cv::Size(modelImageSize.width / 2, modelImageSize.height / 2)];
-  NSArray* bBoxesList = [DetectorUtils getDetBoxesFromTextMap:scoreTextCV affinityMap:scoreAffinityCV usingTextThreshold:textThreshold linkThreshold:linkThreshold lowTextThreshold:lowTextThreshold];
+                               withSize:cv::Size(modelImageSize.width / 2,
+                                                 modelImageSize.height / 2)];
+  NSArray *bBoxesList = [DetectorUtils getDetBoxesFromTextMap:scoreTextCV
+                                                  affinityMap:scoreAffinityCV
+                                           usingTextThreshold:textThreshold
+                                                linkThreshold:linkThreshold
+                                             lowTextThreshold:lowTextThreshold];
   NSLog(@"Detected boxes: %lu", (unsigned long)bBoxesList.count);
-  bBoxesList = [DetectorUtils restoreBboxRatio:bBoxesList usingRestoreRatio: restoreRatio];
-  bBoxesList = [DetectorUtils groupTextBoxes:bBoxesList centerThreshold:centerThreshold distanceThreshold:distanceThreshold heightThreshold:heightThreshold minSideThreshold:minSideThreshold maxSideThreshold:maxSideThreshold maxWidth:maxWidth];
-  
+  bBoxesList = [DetectorUtils restoreBboxRatio:bBoxesList
+                             usingRestoreRatio:restoreRatio];
+  bBoxesList = [DetectorUtils groupTextBoxes:bBoxesList
+                             centerThreshold:centerThreshold
+                           distanceThreshold:distanceThreshold
+                             heightThreshold:heightThreshold
+                            minSideThreshold:minSideThreshold
+                            maxSideThreshold:maxSideThreshold
+                                    maxWidth:maxWidth];
+
   return bBoxesList;
 }
 
 
@@ -9,8 +9,15 @@ constexpr CGFloat adjustContrast = 0.2;
 
 @interface RecognitionHandler : NSObject
 
-- (instancetype)initWithSymbols:(NSString *)symbols languageDictPath:(NSString *)languageDictPath;
-- (void)loadRecognizers:(NSString *)largeRecognizerPath mediumRecognizerPath:(NSString *)mediumRecognizerPath smallRecognizerPath:(NSString *)smallRecognizerPath completion:(void (^)(BOOL, NSNumber *))completion;
-- (NSArray *)recognize:(NSArray<NSDictionary *> *)bBoxesList imgGray:(cv::Mat)imgGray desiredWidth:(int)desiredWidth desiredHeight:(int)desiredHeight;
+- (instancetype)initWithSymbols:(NSString *)symbols
+               languageDictPath:(NSString *)languageDictPath;
+- (void)loadRecognizers:(NSString *)largeRecognizerPath
+    mediumRecognizerPath:(NSString *)mediumRecognizerPath
+     smallRecognizerPath:(NSString *)smallRecognizerPath
+              completion:(void (^)(BOOL, NSNumber *))completion;
+- (NSArray *)recognize:(NSArray<NSDictionary *> *)bBoxesList
+               imgGray:(cv::Mat)imgGray
+          desiredWidth:(int)desiredWidth
+         desiredHeight:(int)desiredHeight;
 
 @end