-
Notifications
You must be signed in to change notification settings - Fork 410
/
Copy pathImport-Excel.ps1
264 lines (254 loc) · 14.1 KB
/
Import-Excel.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
function Import-Excel {
[CmdLetBinding()]
[Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSAvoidUsingPlainTextForPassword", "")]
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSPossibleIncorrectUsageOfAssignmentOperator', '', Justification = 'Intentional')]
param (
[Alias('FullName')]
[Parameter(ParameterSetName = "PathA", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
[Parameter(ParameterSetName = "PathB", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
[Parameter(ParameterSetName = "PathC", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
[String[]]$Path,
[Parameter(ParameterSetName = "PackageA", Mandatory)]
[Parameter(ParameterSetName = "PackageB", Mandatory)]
[Parameter(ParameterSetName = "PackageC", Mandatory)]
[OfficeOpenXml.ExcelPackage]$ExcelPackage,
[Alias('Sheet')]
[Parameter(Position = 1)]
[ValidateNotNullOrEmpty()]
[String[]]$WorksheetName,
[Parameter(ParameterSetName = 'PathB' , Mandatory)]
[Parameter(ParameterSetName = 'PackageB', Mandatory)]
[String[]]$HeaderName ,
[Parameter(ParameterSetName = 'PathC' , Mandatory)]
[Parameter(ParameterSetName = 'PackageC', Mandatory)]
[Switch]$NoHeader ,
[Alias('HeaderRow', 'TopRow')]
[ValidateRange(1, 1048576)]
[Int]$StartRow = 1,
[Alias('StopRow', 'BottomRow')]
[Int]$EndRow ,
[Alias('LeftColumn')]
[Int]$StartColumn = 1,
[Alias('RightColumn')]
[Int]$EndColumn ,
[Switch]$DataOnly,
[string[]]$AsText,
[string[]]$AsDate,
[ValidateNotNullOrEmpty()]
[String]$Password,
[Int[]]$ImportColumns,
[Switch]$Raw
)
end {
$sw = [System.Diagnostics.Stopwatch]::StartNew()
if ($input) {
$Paths = $input
}
elseif ($Path) {
$Paths = $Path
}
else {
$Paths = ''
}
function Get-PropertyNames {
<#
.SYNOPSIS
Create objects containing the column number and the column name for each of the different header types.
#>
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '', Justification = "Name would be incorrect, and command is not exported")]
param(
[Parameter(Mandatory)]
[Int[]]$Columns,
[Parameter(Mandatory)]
[Int]$StartRow
)
try {
if ($ImportColumns) {
$end = $sheet.Dimension.End.Column
# Check $ImportColumns
if ($ImportColumns[0] -le 0) { throw "The first entry in ImportColumns must be equal or greater 1" ; return }
# Check $StartColumn and $EndColumn
if (($StartColumn -ne 1) -or ($EndColumn -ne $end)) { Write-Warning -Message "If ImportColumns is set, then individual StartColumn and EndColumn will be ignored." }
# Replace $Columns with $ImportColumns
$Columns = $ImportColumns
}
if ($HeaderName) {
$i = 0
foreach ($H in $HeaderName) {
$H | Select-Object @{N = 'Column'; E = { $Columns[$i] } }, @{N = 'Value'; E = { $H } }
$i++
}
}
elseif ($NoHeader) {
$i = 0
foreach ($C in $Columns) {
$i++
$C | Select-Object @{N = 'Column'; E = { $_ } }, @{N = 'Value'; E = { 'P' + $i } }
}
}
else {
if ($StartRow -lt 1) {
throw 'The top row can never be less than 1 when we need to retrieve headers from the worksheet.' ; return
}
foreach ($C in $Columns) {
#allow "False" or "0" to be column headings
$sheet.Cells[$StartRow, $C] | Where-Object { -not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Column'; E = { $C } }, Value
}
}
}
catch {
throw "Failed creating property names: $_" ; return
}
}
foreach ($Path in $Paths) {
if ($path) {
$extension = [System.IO.Path]::GetExtension($Path)
if ($extension -notmatch '.xlsx$|.xlsm$') {
throw "Import-Excel does not support reading this extension type $($extension)"
}
$resolvedPath = (Resolve-Path $Path -ErrorAction SilentlyContinue)
if ($resolvedPath) {
$Path = $resolvedPath.ProviderPath
}
else {
throw "'$($Path)' file not found"
}
$stream = New-Object -TypeName System.IO.FileStream -ArgumentList $Path, 'Open', 'Read', 'ReadWrite'
$ExcelPackage = New-Object -TypeName OfficeOpenXml.ExcelPackage
if ($Password) { $ExcelPackage.Load($stream, $Password) }
else { $ExcelPackage.Load($stream) }
}
try {
#Select worksheet
if ($WorksheetName -eq '*') { $Worksheet = $ExcelPackage.Workbook.Worksheets }
elseif (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorksheetName])) {
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
}
$xlBook = [Ordered]@{}
foreach ($sheet in $Worksheet) {
if ($Worksheet.Count -gt 1 -or $Paths.Count -gt 1) {
<#
Needed under these conditions to handle sheets of different number of Row/Col
- When reading more than one xlsx file
- When reading more than one worksheet in the same file
#>
$EndRow = $null
$EndColumn = $null
}
$targetSheetname = $sheet.Name
$xlBook["$targetSheetname"] = @()
#region Get rows and columns
#If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
if (-not $EndRow ) { $EndRow = $sheet.Dimension.End.Row }
if (-not $EndColumn) { $EndColumn = $sheet.Dimension.End.Column }
$endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
if ($DataOnly) {
#If we are using headers startrow will be the header-row so examine data from startRow + 1,
if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress }
else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
#We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
#Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
#of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
#using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
$colHash = @{ }
$rowHash = @{ }
foreach ($cell in $sheet.Cells[$range]) {
if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
}
$rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] })
$columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
}
else {
$Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
if ($NoHeader) { $rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
elseif ($HeaderName) { $rows = $StartRow..$EndRow }
else {
$rows = (1 + $StartRow)..$EndRow
if ($StartRow -eq 1 -and $EndRow -eq 1) {
$rows = 0
}
}
# ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } }
}
#endregion
#region Create property names
if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) {
throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return
}
if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) {
throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return
}
#endregion
if (-not $rows) {
Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'"
}
else {
#region Create one object per row
if ($AsText -or $AsDate) {
<#join items in AsText together with ~~~ . Escape any regex special characters...
# which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%;
So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$
$make a regex for this which is case insensitive (option 1) and compiled (option 8)
#>
$TextColExpression = ''
if ($AsText) {
$TextColExpression += '(?<astext>^' + [regex]::Escape($AsText -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
}
if ($AsText -and $AsDate) {
$TextColExpression += "|"
}
if ($AsDate) {
$TextColExpression += '(?<asDate>^' + [regex]::Escape($AsDate -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
}
$TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
}
else { $TextColRegEx = $null }
foreach ($R in $rows) {
#Disabled write-verbose for speed
# Write-Verbose "Import row '$R'"
$NewRow = [Ordered]@{ }
if ($TextColRegEx) {
foreach ($P in $PropertyNames) {
$MatchTest = $TextColRegEx.Match($P.value)
if ($MatchTest.groups.name -eq "astext") {
$NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Text
}
elseif ($MatchTest.groups.name -eq "asdate" -and $sheet.Cells[$R, $P.Column].Value -is [System.ValueType]) {
$NewRow[$P.Value] = [datetime]::FromOADate(($sheet.Cells[$R, $P.Column].Value))
}
else { $NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value }
}
}
else {
foreach ($P in $PropertyNames) {
$NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value
# Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
}
}
$xlBook["$targetSheetname"] += [PSCustomObject]$NewRow
}
#endregion
}
}
}
catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$WorksheetName': $_"; return }
finally {
# $EndRow = 0
# $EndColumn = 0
if ($Path) { $stream.close(); $ExcelPackage.Dispose() }
if ($Raw) {
foreach ($entry in $xlbook.GetEnumerator()) {
$entry.Value
}
}
elseif ($Worksheet.Count -eq 1) {
$xlBook["$targetSheetname"]
}
else {
$xlBook
}
}
}
}
}