|
| 1 | +--- |
| 2 | +plugin: add-to-gallery |
| 3 | +--- |
| 4 | + |
| 5 | +# Find Links in Modern Page |
| 6 | + |
| 7 | +## Summary |
| 8 | + |
| 9 | +Script will take a csv file that contains url to SharePoint sites and analyse the site pages to see if any of the pages have hyperlinks. |
| 10 | +For every hyperlink in a page this gets output to a row in a csv that is delimited by a pipe |
| 11 | + |
| 12 | +The script reads a list of SharePoint sites from a CSV file, connects to each site and extracts all pages within the specified lists. For every page content that contains an anchor tag, it captures both 'Title' field value (from Page metadata) along with any href tags present in its body text using regex matching, then writes these details into a new CSV file named after today’s date. |
| 13 | + |
| 14 | +Note: Above last paragraph of description uses AI to describe the script. |
| 15 | + |
| 16 | +# [PnP PowerShell](#tab/pnpps) |
| 17 | + |
| 18 | +```powershell |
| 19 | +
|
| 20 | + # Each site in this list will have the script run against |
| 21 | +$csv_SiteList = "sites-test.csv" |
| 22 | +$csv_siteheaders = 'Url' |
| 23 | +
|
| 24 | +# Date used in the file creation |
| 25 | +$date = Get-Date |
| 26 | +$date = $date.ToString("yyyymmddhhss") |
| 27 | +
|
| 28 | +# filename by using the date |
| 29 | +$file_name = $date + 'LinkMatches.csv' |
| 30 | +
|
| 31 | +# Path to create the output fil |
| 32 | +$creation_path = Get-Location |
| 33 | +
|
| 34 | +# The site pages list that this script will run against |
| 35 | +$List = "SitePages" |
| 36 | +
|
| 37 | +# Headers for the output csv |
| 38 | +$headers = "Site Title|Page Title|Page Url|Href Tag" |
| 39 | +
|
| 40 | +# new line character |
| 41 | +$ofs = "`n" |
| 42 | +
|
| 43 | +# delimiter to use |
| 44 | +$delim = '|' |
| 45 | +
|
| 46 | +# regex used to match the href tags that are embeded in the canvas page content |
| 47 | +$regex ='<a\s+(?:[^>]*?\s+)?href=(["])(.*?)\1>' |
| 48 | +
|
| 49 | +# create object of all the sites |
| 50 | +$sites = Import-Csv -Path $csv_SiteList -Header $csv_siteheaders |
| 51 | +
|
| 52 | +#variable for the header |
| 53 | +$csv_outputheader = $headers + $ofs |
| 54 | +
|
| 55 | +#complete file path |
| 56 | +$csv_path = Join-Path $creation_path $file_name |
| 57 | +
|
| 58 | +# create output csv |
| 59 | +New-Item -Path $creation_path -Name $file_name -ItemType File -Value $csv_outputheader |
| 60 | +
|
| 61 | +# itterate around each site from the csv |
| 62 | +foreach($site in $sites) |
| 63 | +{ |
| 64 | + # make the connection, get ome site information and create object that contains all the site pages |
| 65 | + $connection = Connect-PnPOnline -Url $site.Url -Interactive |
| 66 | + $pnpsite = Get-PnPWeb -Connection $connection |
| 67 | + $site_title = $pnpsite.Title |
| 68 | + $pages = (Get-PnPListItem -List $List -Fields "CanvasContent1", "Title" -Connection $connection).FieldValues |
| 69 | +
|
| 70 | + # itterate around each page in the stie to get the information from each page that will be used to build up the row and also conduct |
| 71 | + # the check to see if the canvas content has any href tags embeded |
| 72 | + foreach($page in $pages) |
| 73 | + { |
| 74 | + $page_title = $page.Get_Item("Title") |
| 75 | + $fileref = $page.Get_Item("FileRef") |
| 76 | + $canvascontent = $page.Get_Item("CanvasContent1") |
| 77 | + # check if the canvas has content |
| 78 | + if ($canvascontent.Length -gt 0) |
| 79 | + { |
| 80 | + # hash table of the results that match the href regular expression |
| 81 | + $hrefmatches = ($canvascontent | select-string -pattern $regex -AllMatches).Matches.Value |
| 82 | +
|
| 83 | + # itterate around each regular expression match and write it out into the output csv that is pipe delimited |
| 84 | + foreach($hrefmatch in $hrefmatches) |
| 85 | + { |
| 86 | + $row = $site_title + $delim + $page_title + $delim + $fileref + $delim + $hrefmatch |
| 87 | + Add-Content -Path $csv_path -Value $row |
| 88 | + } |
| 89 | + } |
| 90 | + } |
| 91 | + Disconnect-PnPOnline |
| 92 | +} |
| 93 | +
|
| 94 | +``` |
| 95 | +[!INCLUDE [More about PnP PowerShell](../../docfx/includes/MORE-PNPPS.md)] |
| 96 | +*** |
| 97 | + |
| 98 | +## Source |
| 99 | + |
| 100 | +This script was first created on PnP PowerShell and transferred over in Dec 2024. Details of the orignal author missing. Report if inaccurate. |
| 101 | +https://github.com/pnp/powershell |
| 102 | + |
| 103 | +## Contributors |
| 104 | + |
| 105 | +| Author(s) | |
| 106 | +|-----------| |
| 107 | +| Paul Bullock | |
| 108 | + |
| 109 | +[!INCLUDE [DISCLAIMER](../../docfx/includes/DISCLAIMER.md)] |
| 110 | +<img src="https://m365-visitor-stats.azurewebsites.net/script-samples/scripts/spo-find-links-in-canvas" aria-hidden="true" /> |
0 commit comments