Skip to content

Commit dee900d

Browse files
format: format with clang format
1 parent 0214a1a commit dee900d

15 files changed

+672
-411
lines changed

ios/RnExecutorch/OCR.mm

+80-47
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
#import <ExecutorchLib/ETModel.h>
2-
#import <React/RCTBridgeModule.h>
31
#import "OCR.h"
4-
#import "utils/Fetcher.h"
5-
#import "utils/ImageProcessor.h"
62
#import "models/ocr/Detector.h"
73
#import "models/ocr/RecognitionHandler.h"
4+
#import "utils/Fetcher.h"
5+
#import "utils/ImageProcessor.h"
6+
#import <ExecutorchLib/ETModel.h>
7+
#import <React/RCTBridgeModule.h>
88

99
@implementation OCR {
1010
Detector *detector;
@@ -14,70 +14,103 @@ @implementation OCR {
1414
RCT_EXPORT_MODULE()
1515

1616
- (void)loadModule:(NSString *)detectorSource
17-
recognizerSourceLarge:(NSString *)recognizerSourceLarge
18-
recognizerSourceMedium:(NSString *)recognizerSourceMedium
19-
recognizerSourceSmall:(NSString *)recognizerSourceSmall
20-
symbols:(NSString *)symbols
21-
languageDictPath:(NSString *)languageDictPath
22-
resolve:(RCTPromiseResolveBlock)resolve
23-
reject:(RCTPromiseRejectBlock)reject {
17+
recognizerSourceLarge:(NSString *)recognizerSourceLarge
18+
recognizerSourceMedium:(NSString *)recognizerSourceMedium
19+
recognizerSourceSmall:(NSString *)recognizerSourceSmall
20+
symbols:(NSString *)symbols
21+
languageDictPath:(NSString *)languageDictPath
22+
resolve:(RCTPromiseResolveBlock)resolve
23+
reject:(RCTPromiseRejectBlock)reject {
2424
detector = [[Detector alloc] init];
25-
[detector loadModel:[NSURL URLWithString:detectorSource] completion:^(BOOL success, NSNumber *errorCode) {
26-
if (!success) {
27-
NSError *error = [NSError errorWithDomain:@"OCRErrorDomain"
28-
code:[errorCode intValue]
29-
userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithFormat:@"%ld", (long)[errorCode longValue]]}];
30-
reject(@"init_module_error", @"Failed to initialize detector module", error);
31-
return;
32-
}
33-
[Fetcher fetchResource:[NSURL URLWithString:languageDictPath] resourceType:ResourceType::TXT completionHandler:^(NSString *filePath, NSError *error) {
34-
if (error) {
35-
reject(@"init_module_error", @"Failed to initialize converter module", error);
36-
return;
37-
}
38-
39-
self->recognitionHandler = [[RecognitionHandler alloc] initWithSymbols:symbols languageDictPath:filePath];
40-
[self->recognitionHandler loadRecognizers:recognizerSourceLarge mediumRecognizerPath:recognizerSourceMedium smallRecognizerPath:recognizerSourceSmall completion:^(BOOL allModelsLoaded, NSNumber *errorCode) {
41-
if (allModelsLoaded) {
42-
resolve(@(YES));
43-
} else {
44-
NSError *error = [NSError errorWithDomain:@"OCRErrorDomain"
45-
code:[errorCode intValue]
46-
userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithFormat:@"%ld", (long)[errorCode longValue]]}];
47-
reject(@"init_recognizer_error", @"Failed to initialize one or more recognizer models", error);
25+
[detector
26+
loadModel:[NSURL URLWithString:detectorSource]
27+
completion:^(BOOL success, NSNumber *errorCode) {
28+
if (!success) {
29+
NSError *error = [NSError
30+
errorWithDomain:@"OCRErrorDomain"
31+
code:[errorCode intValue]
32+
userInfo:@{
33+
NSLocalizedDescriptionKey : [NSString
34+
stringWithFormat:@"%ld", (long)[errorCode longValue]]
35+
}];
36+
reject(@"init_module_error", @"Failed to initialize detector module",
37+
error);
38+
return;
4839
}
40+
[Fetcher fetchResource:[NSURL URLWithString:languageDictPath]
41+
resourceType:ResourceType::TXT
42+
completionHandler:^(NSString *filePath, NSError *error) {
43+
if (error) {
44+
reject(@"init_module_error",
45+
@"Failed to initialize converter module", error);
46+
return;
47+
}
48+
49+
self->recognitionHandler =
50+
[[RecognitionHandler alloc] initWithSymbols:symbols
51+
languageDictPath:filePath];
52+
[self->recognitionHandler
53+
loadRecognizers:recognizerSourceLarge
54+
mediumRecognizerPath:recognizerSourceMedium
55+
smallRecognizerPath:recognizerSourceSmall
56+
completion:^(BOOL allModelsLoaded,
57+
NSNumber *errorCode) {
58+
if (allModelsLoaded) {
59+
resolve(@(YES));
60+
} else {
61+
NSError *error = [NSError
62+
errorWithDomain:@"OCRErrorDomain"
63+
code:[errorCode intValue]
64+
userInfo:@{
65+
NSLocalizedDescriptionKey :
66+
[NSString stringWithFormat:
67+
@"%ld",
68+
(long)[errorCode
69+
longValue]]
70+
}];
71+
reject(@"init_recognizer_error",
72+
@"Failed to initialize one or more "
73+
@"recognizer models",
74+
error);
75+
}
76+
}];
77+
}];
4978
}];
50-
}];
51-
}];
5279
}
5380

5481
- (void)forward:(NSString *)input
5582
resolve:(RCTPromiseResolveBlock)resolve
5683
reject:(RCTPromiseRejectBlock)reject {
5784
/*
5885
The OCR consists of two phases:
59-
1. Detection - detecting text regions in the image, the result of this phase is a list of bounding boxes.
60-
2. Recognition - recognizing the text in the bounding boxes, the result is a list of strings and corresponding confidence scores.
61-
62-
Recognition uses three models, each model is resposible for recognizing text of different sizes (e.g. large - 512x64, medium - 256x64, small - 128x64).
86+
1. Detection - detecting text regions in the image, the result of this phase
87+
is a list of bounding boxes.
88+
2. Recognition - recognizing the text in the bounding boxes, the result is a
89+
list of strings and corresponding confidence scores.
90+
91+
Recognition uses three models, each model is resposible for recognizing text
92+
of different sizes (e.g. large - 512x64, medium - 256x64, small - 128x64).
6393
*/
6494
@try {
6595
cv::Mat image = [ImageProcessor readImage:input];
66-
NSArray* result = [detector runModel:image];
96+
NSArray *result = [detector runModel:image];
6797
cv::Size detectorSize = [detector getModelImageSize];
6898
cv::cvtColor(image, image, cv::COLOR_BGR2GRAY);
69-
result = [self->recognitionHandler recognize:result imgGray:image desiredWidth:detectorSize.width * recognizerRatio desiredHeight:detectorSize.height * recognizerRatio];
99+
result = [self->recognitionHandler
100+
recognize:result
101+
imgGray:image
102+
desiredWidth:detectorSize.width * recognizerRatio
103+
desiredHeight:detectorSize.height * recognizerRatio];
70104
resolve(result);
71105
} @catch (NSException *exception) {
72-
reject(@"forward_error", [NSString stringWithFormat:@"%@", exception.reason],
73-
nil);
106+
reject(@"forward_error",
107+
[NSString stringWithFormat:@"%@", exception.reason], nil);
74108
}
75109
}
76110

77111
- (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
78-
(const facebook::react::ObjCTurboModule::InitParams &)params {
79-
return std::make_shared<facebook::react::NativeOCRSpecJSI>(
80-
params);
112+
(const facebook::react::ObjCTurboModule::InitParams &)params {
113+
return std::make_shared<facebook::react::NativeOCRSpecJSI>(params);
81114
}
82115

83116
@end

ios/RnExecutorch/models/ocr/Detector.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
#import "opencv2/opencv.hpp"
21
#import "BaseModel.h"
32
#import "RecognitionHandler.h"
3+
#import "opencv2/opencv.hpp"
44

55
constexpr CGFloat textThreshold = 0.4;
66
constexpr CGFloat linkThreshold = 0.4;

ios/RnExecutorch/models/ocr/Detector.mm

+42-24
Original file line numberDiff line numberDiff line change
@@ -4,72 +4,90 @@
44
#import "utils/OCRUtils.h"
55

66
/*
7-
The model used as detector is based on CRAFT (Character Region Awareness for Text Detection) paper.
8-
https://arxiv.org/pdf/1904.01941
7+
The model used as detector is based on CRAFT (Character Region Awareness for
8+
Text Detection) paper. https://arxiv.org/pdf/1904.01941
99
*/
1010

1111
@implementation Detector {
1212
cv::Size originalSize;
1313
cv::Size modelSize;
1414
}
1515

16-
- (cv::Size)getModelImageSize{
17-
if(!modelSize.empty()) {
16+
- (cv::Size)getModelImageSize {
17+
if (!modelSize.empty()) {
1818
return modelSize;
1919
}
20-
21-
NSArray *inputShape = [module getInputShape: @0];
20+
21+
NSArray *inputShape = [module getInputShape:@0];
2222
NSNumber *widthNumber = inputShape.lastObject;
2323
NSNumber *heightNumber = inputShape[inputShape.count - 2];
24-
24+
2525
const int height = [heightNumber intValue];
2626
const int width = [widthNumber intValue];
2727
modelSize = cv::Size(height, width);
28-
28+
2929
return cv::Size(height, width);
3030
}
3131

3232
- (NSArray *)preprocess:(cv::Mat &)input {
3333
/*
3434
Detector as an input accepts tensor with a shape of [1, 3, 800, 800].
35-
Due to big influence of resize to quality of recognition the image preserves original
36-
aspect ratio and the missing parts are filled with padding.
35+
Due to big influence of resize to quality of recognition the image preserves
36+
original aspect ratio and the missing parts are filled with padding.
3737
*/
3838
self->originalSize = cv::Size(input.cols, input.rows);
39-
39+
4040
cv::Size modelImageSize = [self getModelImageSize];
4141
cv::Mat resizedImage;
42-
resizedImage = [OCRUtils resizeWithPadding:input desiredWidth:modelImageSize.width desiredHeight:modelImageSize.height];
43-
NSArray *modelInput = [ImageProcessor matToNSArray: resizedImage mean:mean variance:variance];
42+
resizedImage = [OCRUtils resizeWithPadding:input
43+
desiredWidth:modelImageSize.width
44+
desiredHeight:modelImageSize.height];
45+
NSArray *modelInput = [ImageProcessor matToNSArray:resizedImage
46+
mean:mean
47+
variance:variance];
4448
return modelInput;
4549
}
4650

4751
- (NSArray *)postprocess:(NSArray *)output {
4852
/*
4953
The output of the model consists of two matrices (heat maps):
5054
1. ScoreText(Score map) - The probability of a region containing character
51-
2. ScoreAffinity(Affinity map) - affinity between characters, used to to group each character into a single instance (sequence)
52-
Both matrices are 400x400
53-
55+
2. ScoreAffinity(Affinity map) - affinity between characters, used to to
56+
group each character into a single instance (sequence) Both matrices are
57+
400x400
58+
5459
The result of this step is a list of bounding boxes that contain text.
5560
*/
5661
NSArray *predictions = [output objectAtIndex:0];
57-
62+
5863
cv::Size modelImageSize = [self getModelImageSize];
5964
cv::Mat scoreTextCV, scoreAffinityCV;
6065
/*
61-
The output of the model is a matrix in size of input image containing two matrices representing heatmap.
62-
Those two matrices are in the size of half of the input image, that's why the width and height is divided by 2.
66+
The output of the model is a matrix in size of input image containing two
67+
matrices representing heatmap. Those two matrices are in the size of half of
68+
the input image, that's why the width and height is divided by 2.
6369
*/
6470
[DetectorUtils interleavedArrayToMats:predictions
6571
outputMat1:scoreTextCV
6672
outputMat2:scoreAffinityCV
67-
withSize:cv::Size(modelImageSize.width / 2, modelImageSize.height / 2)];
68-
NSArray* bBoxesList = [DetectorUtils getDetBoxesFromTextMap:scoreTextCV affinityMap:scoreAffinityCV usingTextThreshold:textThreshold linkThreshold:linkThreshold lowTextThreshold:lowTextThreshold];
73+
withSize:cv::Size(modelImageSize.width / 2,
74+
modelImageSize.height / 2)];
75+
NSArray *bBoxesList = [DetectorUtils getDetBoxesFromTextMap:scoreTextCV
76+
affinityMap:scoreAffinityCV
77+
usingTextThreshold:textThreshold
78+
linkThreshold:linkThreshold
79+
lowTextThreshold:lowTextThreshold];
6980
NSLog(@"Detected boxes: %lu", (unsigned long)bBoxesList.count);
70-
bBoxesList = [DetectorUtils restoreBboxRatio:bBoxesList usingRestoreRatio: restoreRatio];
71-
bBoxesList = [DetectorUtils groupTextBoxes:bBoxesList centerThreshold:centerThreshold distanceThreshold:distanceThreshold heightThreshold:heightThreshold minSideThreshold:minSideThreshold maxSideThreshold:maxSideThreshold maxWidth:maxWidth];
72-
81+
bBoxesList = [DetectorUtils restoreBboxRatio:bBoxesList
82+
usingRestoreRatio:restoreRatio];
83+
bBoxesList = [DetectorUtils groupTextBoxes:bBoxesList
84+
centerThreshold:centerThreshold
85+
distanceThreshold:distanceThreshold
86+
heightThreshold:heightThreshold
87+
minSideThreshold:minSideThreshold
88+
maxSideThreshold:maxSideThreshold
89+
maxWidth:maxWidth];
90+
7391
return bBoxesList;
7492
}
7593

ios/RnExecutorch/models/ocr/RecognitionHandler.h

+10-3
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,15 @@ constexpr CGFloat adjustContrast = 0.2;
99

1010
@interface RecognitionHandler : NSObject
1111

12-
- (instancetype)initWithSymbols:(NSString *)symbols languageDictPath:(NSString *)languageDictPath;
13-
- (void)loadRecognizers:(NSString *)largeRecognizerPath mediumRecognizerPath:(NSString *)mediumRecognizerPath smallRecognizerPath:(NSString *)smallRecognizerPath completion:(void (^)(BOOL, NSNumber *))completion;
14-
- (NSArray *)recognize:(NSArray<NSDictionary *> *)bBoxesList imgGray:(cv::Mat)imgGray desiredWidth:(int)desiredWidth desiredHeight:(int)desiredHeight;
12+
- (instancetype)initWithSymbols:(NSString *)symbols
13+
languageDictPath:(NSString *)languageDictPath;
14+
- (void)loadRecognizers:(NSString *)largeRecognizerPath
15+
mediumRecognizerPath:(NSString *)mediumRecognizerPath
16+
smallRecognizerPath:(NSString *)smallRecognizerPath
17+
completion:(void (^)(BOOL, NSNumber *))completion;
18+
- (NSArray *)recognize:(NSArray<NSDictionary *> *)bBoxesList
19+
imgGray:(cv::Mat)imgGray
20+
desiredWidth:(int)desiredWidth
21+
desiredHeight:(int)desiredHeight;
1522

1623
@end

0 commit comments

Comments
 (0)