Skip to content

Commit

Permalink
Add support for oEmbed
Browse files Browse the repository at this point in the history
  • Loading branch information
Patrick Burrows committed Oct 13, 2024
1 parent b2a0a7a commit 12f3c34
Show file tree
Hide file tree
Showing 4 changed files with 356 additions and 19 deletions.
51 changes: 49 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,58 @@ This library just parses the Html for OpenGraph and Twitter Card `<meta />` tags

Unfurl.Net is a simple library that depends on the venerable [HtmlAgilityPack](https://github.com/zzzprojects/html-agility-pack) for its parsing.

## Usage

Basic usage that loads open graph and X/Twitter cards:

```csharp
var unfurler = new Unfurler();
var url = "https://developer.x.com/en/docs/x-for-websites/cards/overview/markup";
var results = await unfurler.Unfurl(url);
```

Include oEmbed data:

```csharp
var httpClient = new HttpClient(); // <-- don't do this in production!
var unfurler = new Unfurler();
var url = "https://www.youtube.com/watch?v=5EI0OP7o8cM";
var results = await unfurler.Unfurl(url, new UnfurlOptions()
{
LoadOEmbed = true,
OEmbedHttpClient = httpClient,
});
```

Handle redirects and shortened links:

```csharp
var unfurler = new Unfurler();
var url = "https://www.youtube.com/watch?v=5EI0OP7o8cM";
var results = await unfurler.Unfurl(url, new UnfurlOptions()
{
MaximumRedirects = 2,
});
```

All Options:

| Property | Description |
|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Encoding | set the encoding of the target url. Default to null |
| Credentials | pass in custom network credentials if needed. |
| CancellationToken | control cancellation of the network requests |
| MaximumRedirects | the number of redirects to follow from 301 or 302 responses. |
| UserAgent | the user-agent string to pass as part of the request headers. Defaults to "Unfurl.Net/1.1" |
| LoadOEmbed | whether or not to make the extra web-request to load any oEmbed content |
| OEmbedHttpClient | Instance of [HttpClient](https://learn.microsoft.com/en-us/dotnet/api/system.net.http.httpclient?view=net-8.0) to use for querying oEmbed. If null, will create a new instance of HttpClient which has performance implications for high-volume services. |



## Roadmap

The current features are sufficient for my needs. But if there are any additional features you would like to see, please add an issue (or even submit a PR if you want) and I will work on adding them.

Here are some features that could be added:

* Accept Html as a string or stream for parsing, and not just a Url that has to be downloaded. This will allow alternate methods of downloading Urls and Unfurl.Net can be used simply as the parser, not also as the Html downloader.
* Work with oEmbed data. oEmbed requires looking for the oEmbed tag (which unfurl.net does now) and then downloading the Json or Xml file referenced in that tag. At this point, I hadn't come across a lot of oEmbed tags, so I didn't implement that feature. But if this changes in the future, I will add that (or, again, feel free to request this feature in an issue.)
* Accept Html as a string or stream for parsing, and not just a Url that has to be downloaded. This will allow alternate methods of downloading Urls and Unfurl.Net can be used simply as the parser, not also as the Html downloader.
71 changes: 64 additions & 7 deletions Unfurl.Net.Tests/Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class Tests
public async Task CanLoadATwitterCard()
{
var unfurler = new Unfurler();
var url = "https://developer.twitter.com/en/docs/twitter-for-websites/cards/overview/markup";
var url = "https://developer.x.com/en/docs/x-for-websites/cards/overview/markup";
var results = await unfurler.Unfurl(url);

results.Url.Should().Be(url);
Expand All @@ -23,19 +23,17 @@ public async Task CanParseAYouTubeLink()
{
var unfurler = new Unfurler();
var url = "https://www.youtube.com/watch?v=Unzc731iCUY";
var results = await unfurler.Unfurl(url);
var results = await unfurler.Unfurl(url, new() {LoadOEmbed = true});

results.Url.Should().Be(url);
results.FavIcon.Should().Contain("favicon_32x32.png");
results.CanonicalUrl.Should().Be("https://www.youtube.com/watch?v=Unzc731iCUY");
results.Description.Should()
.Be(
"MIT How to Speak, IAP 2018Instructor: Patrick WinstonView the complete course: https://ocw.mit.edu/how_to_speakPatrick Winston&#39;s How to Speak talk has been a...");
results.Keywords.Count.Should().Be(1);
results.Keywords.Count.Should().BeGreaterThan(1);
results.Keywords[0].Should().Be("Aloud");
results.OEmbedLink.Should()
.Be(
"https://www.youtube.com/oembed?format=json&amp;url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DUnzc731iCUY");

results.XTwitter.Card.Should().Be("player");
results.XTwitter.Image.Should().Be("https://i.ytimg.com/vi/Unzc731iCUY/maxresdefault.jpg");
results.XTwitter.Title.Should().Be("How to Speak");
Expand All @@ -50,6 +48,14 @@ public async Task CanParseAYouTubeLink()
.Be(
"MIT How to Speak, IAP 2018Instructor: Patrick WinstonView the complete course: https://ocw.mit.edu/how_to_speakPatrick Winston&#39;s How to Speak talk has been a...");
results.OpenGraph.Type.Should().Be("video.other");


results.OEmbed.Should().NotBeNull();
results.OEmbed!.Title.Should().StartWith("How to Speak");
results.OEmbed!.Type.Should().Be(OEmbedTypes.Video);
var video = results.OEmbed as OEmbedVideo;
video.Should().NotBeNull();
video!.Html.Should().StartWith("<");
}

[Fact]
Expand All @@ -70,6 +76,57 @@ public async Task CanParseSpotifyPlaylist()
var results = await unfurler.Unfurl(url);

results.Url.Should().Be(url);
results.OpenGraph.Title.Should().Be("Alternative Hip-Hop");
results.OpenGraph.Should().NotBeNull();
results.OpenGraph!.Title.Should().Be("Alternative Hip-Hop");
}

[Fact]
public async Task CanParseMsFormOembed()
{
var httpClient = new HttpClient();
// https://youtu.be/5EI0OP7o8cM?si=Iu1qnqk8aXrkc_Bi
var unfurler = new Unfurler();
var url = "https://forms.office.com/r/YLPA60FDtJ";
var results = await unfurler.Unfurl(url, new UnfurlOptions()
{
LoadOEmbed = true,
MaximumRedirects = 2,
OEmbedHttpClient = httpClient,
});

results.OEmbed.Should().NotBeNull();
}

[Fact]
public async Task CanParseYouTubeShareLink()
{
var unfurler = new Unfurler();
var url = "https://youtu.be/5EI0OP7o8cM?si=Iu1qnqk8aXrkc_Bi";
var results = await unfurler.Unfurl(url, new UnfurlOptions
{
MaximumRedirects = 2,
LoadOEmbed = true
});

results.OpenGraph.Should().NotBeNull();
results.OpenGraph!.Title.Should().Be("If Beethoven Were A METAL Bassist...");
results.OEmbed.Should().NotBeNull();
results.OEmbed!.Title.Should().Be("If Beethoven Were A METAL Bassist...");
results.OEmbed!.Type.Should().Be(OEmbedTypes.Video);
var video = results.OEmbed as OEmbedVideo;
video.Should().NotBeNull();
video!.Html.Should().StartWith("<");
}

[Fact]
public async Task DoesntParseOembedWhenToldNotTo()
{
var unfurler = new Unfurler();
var url = "https://youtu.be/5EI0OP7o8cM?si=Iu1qnqk8aXrkc_Bi";
var results = await unfurler.Unfurl(url, new UnfurlOptions() { LoadOEmbed = false, MaximumRedirects = 2 });

results.OpenGraph.Should().NotBeNull();
results.OpenGraph!.Title.Should().Be("If Beethoven Were A METAL Bassist...");
results.OEmbed.Should().BeNull();
}
}
57 changes: 53 additions & 4 deletions Unfurl.Net/Models.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading;

Expand All @@ -9,12 +10,16 @@ public class UnfurlOptions
{
public Encoding? Encoding { get; set; }
public NetworkCredential? Credentials { get; set; }
public string? Username { get; set; }
public string? Password { get; set; }
public CancellationToken? CancellationToken { get; set; }
public int? MaximumRedirects { get; set; }
public string? UserAgent { get; set; }
public bool LoadOEmbed { get; set; } = false;

/// <summary>
/// HttpClient to use for querying OEmbed content.
/// If null, will create a new instance of HttpClient which has performance implications for high-volume services.
/// </summary>
public HttpClient? OEmbedHttpClient { get; set; }
}

public class UnfurlResult
Expand All @@ -27,7 +32,7 @@ public class UnfurlResult
public string? Author { get; set; }
public string? ThemeColor { get; set; }
public string? CanonicalUrl { get; set; }
public OEmbedDetails? OEmbed { get; set; }
public OEmbedBase? OEmbed { get; set; }
public XTwitterDetails? XTwitter { get; set; }
public OpenGraphDetails? OpenGraph { get; set; }
public string? Encoding { get; set; }
Expand All @@ -36,8 +41,52 @@ public class UnfurlResult
public string? OEmbedLink { get; set; }
}

public class OEmbedDetails
public class OEmbedBase
{
public string Type { get; set; }
public string Version { get; set; } = null!;
public string? Title { get; set; }
public string? AuthorName { get; set; }
public string? AuthorUrl { get; set; }
public string? ProviderName { get; set; }
public string? ProviderUrl { get; set; }
public int? CacheAge { get; set; }
public string? ThumbnailUrl { get; set; }
public int? ThumbnailWidth { get; set; }
public int? ThumbnailHeight { get; set; }
}

public class OEmbedPhoto : OEmbedBase
{
public string Url { get; set; } = null!;
public int Width { get; set; }
public int Height { get; set; }
}

public class OEmbedVideo : OEmbedBase
{
public string Html { get; set; } = null!;
public int Width { get; set; }
public int Height { get; set; }
}

public class OEmbedLink : OEmbedBase
{
}

public class OEmbedRich : OEmbedBase
{
public string Html { get; set; } = null!;
public int Width { get; set; }
public int Height { get; set; }
}

public struct OEmbedTypes
{
public const string Photo = "photo";
public const string Video = "video";
public const string Link = "link";
public const string Rich = "rich";
}

public class XTwitterDetails
Expand Down
Loading

0 comments on commit 12f3c34

Please sign in to comment.