forked from microsoft/kernel-memory
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProgram.cs
96 lines (80 loc) · 3.56 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
// Copyright (c) Microsoft. All rights reserved.
using Microsoft.KernelMemory.DataFormats;
using Microsoft.KernelMemory.DataFormats.Office;
using Microsoft.KernelMemory.DataFormats.Pdf;
FileContent content = new();
// ===================================================================================================================
// MS Word example
Console.WriteLine("===============================");
Console.WriteLine("=== Text in mswordfile.docx ===");
Console.WriteLine("===============================");
content = new MsWordDecoder().ExtractContent("mswordfile.docx");
foreach (FileSection section in content.Sections)
{
Console.WriteLine($"Page: {section.Number}/{content.Sections.Count}");
Console.WriteLine(section.Content);
Console.WriteLine("-----");
}
Console.WriteLine("============================");
Console.WriteLine("Press a Enter to continue...");
Console.ReadLine();
// ===================================================================================================================
// MS PowerPoint example
Console.WriteLine("===============================");
Console.WriteLine("=== Text in mspowerpointfile.pptx ===");
Console.WriteLine("===============================");
content = new MsPowerPointDecoder().ExtractContent("mspowerpointfile.pptx",
withSlideNumber: true,
withEndOfSlideMarker: false,
skipHiddenSlides: true);
foreach (FileSection section in content.Sections)
{
Console.WriteLine($"Slide: {section.Number}/{content.Sections.Count}");
Console.WriteLine(section.Content);
Console.WriteLine("-----");
}
Console.WriteLine("============================");
Console.WriteLine("Press a Enter to continue...");
Console.ReadLine();
// ===================================================================================================================
// MS Excel example
Console.WriteLine("===============================");
Console.WriteLine("=== Text in msexcelfile.xlsx ===");
Console.WriteLine("===============================");
content = new MsExcelDecoder().ExtractContent("msexcelfile.xlsx");
foreach (FileSection section in content.Sections)
{
Console.WriteLine($"Worksheet: {section.Number}/{content.Sections.Count}");
Console.WriteLine(section.Content);
Console.WriteLine("-----");
}
Console.WriteLine("============================");
Console.WriteLine("Press a Enter to continue...");
Console.ReadLine();
// ===================================================================================================================
// PDF example 1, short document
Console.WriteLine("=========================");
Console.WriteLine("=== Text in file1.pdf ===");
Console.WriteLine("=========================");
content = new PdfDecoder().ExtractContent("file1.pdf");
foreach (FileSection section in content.Sections)
{
Console.WriteLine($"Page: {section.Number}/{content.Sections.Count}");
Console.WriteLine(section.Content);
Console.WriteLine("-----");
}
Console.WriteLine("============================");
Console.WriteLine("Press a Enter to continue...");
Console.ReadLine();
// ===================================================================================================================
// PDF example 2, scanned book
Console.WriteLine("=========================");
Console.WriteLine("=== Text in file2.pdf ===");
Console.WriteLine("=========================");
content = new PdfDecoder().ExtractContent("file2.pdf");
foreach (FileSection section in content.Sections)
{
Console.WriteLine($"Page: {section.Number}/{content.Sections.Count}");
Console.WriteLine(section.Content);
Console.WriteLine("-----");
}