-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathBlobTriggerProcessFile.cs
142 lines (118 loc) · 5.72 KB
/
BlobTriggerProcessFile.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
using Azure;
using Azure.AI.FormRecognizer.DocumentAnalysis;
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Models;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Host;
using Microsoft.Extensions.Logging;
using Newtonsoft.Json;
using System;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using System.Transactions;
using Company.Function.Models;
using System.Collections.Generic;
namespace Company.Function
{
public class BlobTriggerProcessFile
{
[FunctionName("BlobTriggerProcessFile")]
public async Task RunAsync([BlobTrigger("raw/{name}", Connection = "StorageConnectionString")]Stream myBlob, string name, ILogger log)
{
try
{
log.LogInformation($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes");
string subscriptionKey = Environment.GetEnvironmentVariable("DocumentIntelligenceSubscriptionKey") ?? "Default Sub";;
string endpoint = Environment.GetEnvironmentVariable("DocumentIntelligenceEndpoint") ?? "Default End";
log.LogInformation($"subkey = {subscriptionKey}");
log.LogInformation($"endpoint = {endpoint}");
AzureKeyCredential credential = new AzureKeyCredential(subscriptionKey);
DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(endpoint), credential);
string imgUrl = $"https://{Environment.GetEnvironmentVariable("StorageAccount")}.blob.core.windows.net/raw/{name}";
log.LogInformation(imgUrl);
Uri fileUri = new Uri(imgUrl);
log.LogInformation("About to get data from document intelligence module.");
AnalyzeDocumentOperation operation = await client.AnalyzeDocumentFromUriAsync(WaitUntil.Completed, "prebuilt-read", fileUri);
AnalyzeResult result = operation.Value;
var content = "";
bool contentFound = false;
var tasks = new List<Task>();
//Split by page if there is content...
foreach (DocumentPage page in result.Pages)
{
log.LogInformation("Checking out document data...");
for (int i = 0; i < page.Lines.Count; i++)
{
DocumentLine line = page.Lines[i];
log.LogDebug($" Line {i} has content: '{line.Content}'.");
content += line.Content.ToString();
contentFound = true;
}
log.LogInformation("content = " + content);
tasks.Add(WriteAnalysisContent(name, page.PageNumber, content, log));
content = "";
}
//Otherwise, split by paragraphs
if (!contentFound && result.Paragraphs != null)
{
var counter = 0;
foreach (DocumentParagraph paragraph in result.Paragraphs)
{
if (paragraph != null && !string.IsNullOrWhiteSpace(paragraph.Content))
{
tasks.Add(WriteAnalysisContent(name, counter, paragraph.Content, log));
counter++;
}
}
}
Task.WaitAll(tasks.ToArray());
}
catch (Exception ex)
{
log.LogError(ex.Message);
}
}
private async Task<bool> WriteAnalysisContent(string name, int counter, string content, ILogger log)
{
try
{
// Get the extension of the file
string extension = Path.GetExtension(name);
string nameWithoutExtension = Path.GetFileNameWithoutExtension(name);
string newName = nameWithoutExtension.Replace(".", "_");
newName += $"_{counter.ToString().PadLeft(4,'0')}.json";
string blobName = nameWithoutExtension + "/" + newName;
var jsonObj = new ProcessedFile
{
FileName = name,
BlobName = blobName,
Content = content
};
string jsonStr = JsonConvert.SerializeObject(jsonObj);
// Save the JSON string to Azure Blob Storage
string connectionString = Environment.GetEnvironmentVariable("StorageConnectionString") ?? "DefaultConnection";
string containerName = Environment.GetEnvironmentVariable("ExtractedContainerName") ?? "DefaultContainer";
BlobServiceClient blobServiceClient = new BlobServiceClient(connectionString);
BlobContainerClient containerClient = blobServiceClient.GetBlobContainerClient(containerName);
containerClient.CreateIfNotExists();
BlobClient blobClient = containerClient.GetBlobClient(blobName);
using (var stream = new MemoryStream())
{
byte[] jsonBytes = System.Text.Encoding.UTF8.GetBytes(jsonStr);
stream.Write(jsonBytes, 0, jsonBytes.Length);
stream.Seek(0, SeekOrigin.Begin);
await blobClient.UploadAsync(stream, overwrite: true);
}
log.LogInformation($"JSON file {newName} saved to Azure Blob Storage.");
return true;
}
catch (Exception exe)
{
log.LogError("Unable to save file: " + exe.Message);
return false;
}
}
}
}