Skip to content

Commit 6a8df8e

Browse files
authored
Merge pull request #788 from pnp/main-import
Added Get Links in Page Sample
2 parents 2f67013 + d975873 commit 6a8df8e

File tree

6 files changed

+239
-0
lines changed

6 files changed

+239
-0
lines changed

Diff for: scripts/spo-find-links-in-canvas/README.md

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
---
2+
plugin: add-to-gallery
3+
---
4+
5+
# Find Links in Modern Page
6+
7+
## Summary
8+
9+
Script will take a csv file that contains url to SharePoint sites and analyse the site pages to see if any of the pages have hyperlinks.
10+
For every hyperlink in a page this gets output to a row in a csv that is delimited by a pipe
11+
12+
The script reads a list of SharePoint sites from a CSV file, connects to each site and extracts all pages within the specified lists. For every page content that contains an anchor tag, it captures both 'Title' field value (from Page metadata) along with any href tags present in its body text using regex matching, then writes these details into a new CSV file named after today’s date.
13+
14+
Note: Above last paragraph of description uses AI to describe the script.
15+
16+
# [PnP PowerShell](#tab/pnpps)
17+
18+
```powershell
19+
20+
# Each site in this list will have the script run against
21+
$csv_SiteList = "sites-test.csv"
22+
$csv_siteheaders = 'Url'
23+
24+
# Date used in the file creation
25+
$date = Get-Date
26+
$date = $date.ToString("yyyymmddhhss")
27+
28+
# filename by using the date
29+
$file_name = $date + 'LinkMatches.csv'
30+
31+
# Path to create the output fil
32+
$creation_path = Get-Location
33+
34+
# The site pages list that this script will run against
35+
$List = "SitePages"
36+
37+
# Headers for the output csv
38+
$headers = "Site Title|Page Title|Page Url|Href Tag"
39+
40+
# new line character
41+
$ofs = "`n"
42+
43+
# delimiter to use
44+
$delim = '|'
45+
46+
# regex used to match the href tags that are embeded in the canvas page content
47+
$regex ='<a\s+(?:[^>]*?\s+)?href=(["])(.*?)\1>'
48+
49+
# create object of all the sites
50+
$sites = Import-Csv -Path $csv_SiteList -Header $csv_siteheaders
51+
52+
#variable for the header
53+
$csv_outputheader = $headers + $ofs
54+
55+
#complete file path
56+
$csv_path = Join-Path $creation_path $file_name
57+
58+
# create output csv
59+
New-Item -Path $creation_path -Name $file_name -ItemType File -Value $csv_outputheader
60+
61+
# itterate around each site from the csv
62+
foreach($site in $sites)
63+
{
64+
# make the connection, get ome site information and create object that contains all the site pages
65+
$connection = Connect-PnPOnline -Url $site.Url -Interactive
66+
$pnpsite = Get-PnPWeb -Connection $connection
67+
$site_title = $pnpsite.Title
68+
$pages = (Get-PnPListItem -List $List -Fields "CanvasContent1", "Title" -Connection $connection).FieldValues
69+
70+
# itterate around each page in the stie to get the information from each page that will be used to build up the row and also conduct
71+
# the check to see if the canvas content has any href tags embeded
72+
foreach($page in $pages)
73+
{
74+
$page_title = $page.Get_Item("Title")
75+
$fileref = $page.Get_Item("FileRef")
76+
$canvascontent = $page.Get_Item("CanvasContent1")
77+
# check if the canvas has content
78+
if ($canvascontent.Length -gt 0)
79+
{
80+
# hash table of the results that match the href regular expression
81+
$hrefmatches = ($canvascontent | select-string -pattern $regex -AllMatches).Matches.Value
82+
83+
# itterate around each regular expression match and write it out into the output csv that is pipe delimited
84+
foreach($hrefmatch in $hrefmatches)
85+
{
86+
$row = $site_title + $delim + $page_title + $delim + $fileref + $delim + $hrefmatch
87+
Add-Content -Path $csv_path -Value $row
88+
}
89+
}
90+
}
91+
Disconnect-PnPOnline
92+
}
93+
94+
```
95+
[!INCLUDE [More about PnP PowerShell](../../docfx/includes/MORE-PNPPS.md)]
96+
***
97+
98+
## Source
99+
100+
This script was first created on PnP PowerShell and transferred over in Dec 2024. Details of the orignal author missing. Report if inaccurate.
101+
https://github.com/pnp/powershell
102+
103+
## Contributors
104+
105+
| Author(s) |
106+
|-----------|
107+
| Paul Bullock |
108+
109+
[!INCLUDE [DISCLAIMER](../../docfx/includes/DISCLAIMER.md)]
110+
<img src="https://m365-visitor-stats.azurewebsites.net/script-samples/scripts/spo-find-links-in-canvas" aria-hidden="true" />

Diff for: scripts/spo-find-links-in-canvas/assets/example.png

328 KB
Loading

Diff for: scripts/spo-find-links-in-canvas/assets/preview.png

58.7 KB
Loading

Diff for: scripts/spo-find-links-in-canvas/assets/sample.json

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
[
2+
{
3+
"name": "spo-find-links-in-canvas",
4+
"source": "pnp",
5+
"title": "Find Links in Modern Page",
6+
"shortDescription": "This script iterates through a list of site URLs provided in 'sites-test.csv'. For each site, it then scans these page contents for embedded href tags using regular expressions.",
7+
"url": "https://pnp.github.io/script-samples/spo-find-links-in-canvas/README.html",
8+
"longDescription": [
9+
""
10+
],
11+
"creationDateTime": "2024-12-03",
12+
"updateDateTime": "2024-12-03",
13+
"products": [
14+
"SharePoint"
15+
],
16+
"metadata": [
17+
{
18+
"key": "PNP-POWERSHELL",
19+
"value": "2.12.0"
20+
}
21+
],
22+
"categories": [
23+
"Report"
24+
],
25+
"tags": [
26+
"Connect-PnPOnline",
27+
"Get-PnPListItem",
28+
"Get-PnPWeb"
29+
],
30+
"thumbnails": [
31+
{
32+
"type": "image",
33+
"order": 100,
34+
"url": "https://raw.githubusercontent.com/pnp/script-samples/main/scripts/spo-find-links-in-canvas/assets/preview.png",
35+
"alt": "Preview of the sample Find Links in Modern Page"
36+
}
37+
],
38+
"authors": [
39+
{
40+
"gitHubAccount": "pkbullock",
41+
"company": "",
42+
"pictureUrl": "https://github.com/pkbullock.png",
43+
"name": "Paul Bullock"
44+
}
45+
],
46+
"references": [
47+
{
48+
"name": "Want to learn more about PnP PowerShell and the cmdlets",
49+
"description": "Check out the PnP PowerShell site to get started and for the reference to the cmdlets.",
50+
"url": "https://aka.ms/pnp/powershell"
51+
}
52+
]
53+
}
54+
]

Diff for: scripts/spo-find-links-in-canvas/assets/sites.csv

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
https://XXXX.sharepoint.com/sites/XXX
2+
https://XXXX.sharepoint.com/sites/XXX

Diff for: scripts/spo-find-links-in-canvas/assets/validate.ps1

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Each site in this list will have the script run against
2+
$csv_SiteList = "sites-test.csv"
3+
$csv_siteheaders = 'Url'
4+
5+
# Date used in the file creation
6+
$date = Get-Date
7+
$date = $date.ToString("yyyymmddhhss")
8+
9+
# filename by using the date
10+
$file_name = $date + 'LinkMatches.csv'
11+
12+
# Path to create the output fil
13+
$creation_path = Get-Location
14+
15+
# The site pages list that this script will run against
16+
$List = "SitePages"
17+
18+
# Headers for the output csv
19+
$headers = "Site Title|Page Title|Page Url|Href Tag"
20+
21+
# new line character
22+
$ofs = "`n"
23+
24+
# delimiter to use
25+
$delim = '|'
26+
27+
# regex used to match the href tags that are embeded in the canvas page content
28+
$regex ='<a\s+(?:[^>]*?\s+)?href=(["])(.*?)\1>'
29+
30+
# create object of all the sites
31+
$sites = Import-Csv -Path $csv_SiteList -Header $csv_siteheaders
32+
33+
#variable for the header
34+
$csv_outputheader = $headers + $ofs
35+
36+
#complete file path
37+
$csv_path = Join-Path $creation_path $file_name
38+
39+
# create output csv
40+
New-Item -Path $creation_path -Name $file_name -ItemType File -Value $csv_outputheader
41+
42+
# itterate around each site from the csv
43+
foreach($site in $sites)
44+
{
45+
# make the connection, get ome site information and create object that contains all the site pages
46+
$connection = Connect-PnPOnline -Url $site.Url -Interactive
47+
$pnpsite = Get-PnPWeb -Connection $connection
48+
$site_title = $pnpsite.Title
49+
$pages = (Get-PnPListItem -List $List -Fields "CanvasContent1", "Title" -Connection $connection).FieldValues
50+
51+
# itterate around each page in the stie to get the information from each page that will be used to build up the row and also conduct
52+
# the check to see if the canvas content has any href tags embeded
53+
foreach($page in $pages)
54+
{
55+
$page_title = $page.Get_Item("Title")
56+
$fileref = $page.Get_Item("FileRef")
57+
$canvascontent = $page.Get_Item("CanvasContent1")
58+
# check if the canvas has content
59+
if ($canvascontent.Length -gt 0)
60+
{
61+
# hash table of the results that match the href regular expression
62+
$hrefmatches = ($canvascontent | select-string -pattern $regex -AllMatches).Matches.Value
63+
64+
# itterate around each regular expression match and write it out into the output csv that is pipe delimited
65+
foreach($hrefmatch in $hrefmatches)
66+
{
67+
$row = $site_title + $delim + $page_title + $delim + $fileref + $delim + $hrefmatch
68+
Add-Content -Path $csv_path -Value $row
69+
}
70+
}
71+
}
72+
Disconnect-PnPOnline
73+
}

0 commit comments

Comments
 (0)