diff --git a/apps/database-new/next-env.d.ts b/apps/database-new/next-env.d.ts
new file mode 100644
index 0000000000000..4f11a03dc6cc3
--- /dev/null
+++ b/apps/database-new/next-env.d.ts
@@ -0,0 +1,5 @@
+///
+///
+
+// NOTE: This file should not be edited
+// see https://nextjs.org/docs/basic-features/typescript for more information.
diff --git a/apps/docs/app/page.tsx b/apps/docs/app/page.tsx
index 35d0045cc6909..3f8009fb7a943 100644
--- a/apps/docs/app/page.tsx
+++ b/apps/docs/app/page.tsx
@@ -68,6 +68,13 @@ const products = [
description:
'Globally distributed, server-side functions to execute your code closest to your users for the lowest latency.',
},
+ {
+ title: 'Analytics',
+ icon: 'database',
+ hasLightIcon: true,
+ href: '/guides/warehouse',
+ description: 'Data analytics for ingesting and querying timeseries events.',
+ },
]
const migrationGuides = [
diff --git a/apps/docs/components/Feedback/Feedback.utils.ts b/apps/docs/components/Feedback/Feedback.utils.ts
index c1e93f10f2739..bdb668476575f 100644
--- a/apps/docs/components/Feedback/Feedback.utils.ts
+++ b/apps/docs/components/Feedback/Feedback.utils.ts
@@ -26,6 +26,8 @@ const getNotionTeam = (pathname: string) => {
return 'team-ai'
case 'cli':
return 'team-cli'
+ case 'warehouse':
+ return 'team-analytics'
// Ignoring platform for now because that section is a mix of teams.
case 'platform':
diff --git a/apps/docs/components/Navigation/Navigation.commands.tsx b/apps/docs/components/Navigation/Navigation.commands.tsx
index 77080e7fd300f..1ea9c16ee41af 100644
--- a/apps/docs/components/Navigation/Navigation.commands.tsx
+++ b/apps/docs/components/Navigation/Navigation.commands.tsx
@@ -40,6 +40,12 @@ const navCommands = [
route: '/guides/realtime',
icon: () => ,
},
+ {
+ id: 'nav-warehouse',
+ name: 'Go to Analytics',
+ route: '/guides/warehouse',
+ icon: () => ,
+ },
{
id: 'nav-ai',
name: 'Go to AI & Vectors',
diff --git a/apps/docs/components/Navigation/NavigationMenu/MenuIconPicker.tsx b/apps/docs/components/Navigation/NavigationMenu/MenuIconPicker.tsx
index acab4cf6f9745..8e9cd4b59aab1 100644
--- a/apps/docs/components/Navigation/NavigationMenu/MenuIconPicker.tsx
+++ b/apps/docs/components/Navigation/NavigationMenu/MenuIconPicker.tsx
@@ -49,6 +49,8 @@ function getMenuIcon(menuKey: string, width: number = 16, height: number = 16, c
return
case 'realtime':
return
+ case 'warehouse':
+ return
case 'storage':
return
case 'ai':
diff --git a/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.constants.ts b/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.constants.ts
index b8abddbb63542..118fca0796bb0 100644
--- a/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.constants.ts
+++ b/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.constants.ts
@@ -44,6 +44,12 @@ export const GLOBAL_MENU_ITEMS: GlobalMenuItems = [
href: '/guides/realtime',
level: 'realtime',
},
+ {
+ label: 'Analytics',
+ icon: 'warehouse',
+ href: '/guides/warehouse',
+ level: 'warehouse',
+ },
{
label: 'AI & Vectors',
icon: 'ai',
@@ -1389,6 +1395,26 @@ export const realtime: NavMenuConstant = {
],
}
+export const warehouse: NavMenuConstant = {
+ icon: 'warehouse',
+ title: 'Analytics',
+ url: '/guides/warehouse',
+ items: [
+ {
+ name: 'Overview & quickstart',
+ url: '/guides/warehouse',
+ },
+ {
+ name: 'Concepts',
+ url: undefined,
+ items: [
+ { name: 'Ingestion', url: '/guides/warehouse/ingestion' },
+ { name: 'BigQuery Backend', url: '/guides/warehouse/backends/bigquery' },
+ ],
+ },
+ ],
+}
+
export const storage: NavMenuConstant = {
icon: 'storage',
title: 'Storage',
diff --git a/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.tsx b/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.tsx
index 272443a349847..55873b09810f5 100644
--- a/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.tsx
+++ b/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.tsx
@@ -12,6 +12,7 @@ enum MenuId {
Auth = 'auth',
Functions = 'functions',
Realtime = 'realtime',
+ Warehouse = 'warehouse',
Storage = 'storage',
Ai = 'ai',
Platform = 'platform',
@@ -85,6 +86,10 @@ const menus: Menu[] = [
id: MenuId.Realtime,
type: 'guide',
},
+ {
+ id: MenuId.Warehouse,
+ type: 'guide',
+ },
{
id: MenuId.Storage,
type: 'guide',
diff --git a/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.utils.ts b/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.utils.ts
index 85934ed317ea8..fd106a8936385 100644
--- a/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.utils.ts
+++ b/apps/docs/components/Navigation/NavigationMenu/NavigationMenu.utils.ts
@@ -123,6 +123,8 @@ export const getMenuId = (pathname: string | null) => {
return MenuId.Platform
case pathname.startsWith('realtime'):
return MenuId.Realtime
+ case pathname.startsWith('warehouse'):
+ return MenuId.Warehouse
case pathname.startsWith('resources'):
return MenuId.Resources
case pathname.startsWith('self-hosting'):
diff --git a/apps/docs/content/guides/getting-started/features.mdx b/apps/docs/content/guides/getting-started/features.mdx
index 6a982ddec9a9f..27bf2a2adbec5 100644
--- a/apps/docs/content/guides/getting-started/features.mdx
+++ b/apps/docs/content/guides/getting-started/features.mdx
@@ -164,6 +164,12 @@ Execute an Edge Function in a region close to your database. [Docs](/docs/guides
Edge functions natively support NPM modules and Node built-in APIs. [Link](https://supabase.com/blog/edge-functions-node-npm).
+## Analytics
+
+### Event Ingestion
+
+Ingest and query JSON timeseries data. [Docs](/docs/guides/warehouse/ingestion).
+
## Project management
### CLI
diff --git a/apps/docs/content/guides/platform.mdx b/apps/docs/content/guides/platform.mdx
index 6816244e44e60..97b2f6cd6fc93 100644
--- a/apps/docs/content/guides/platform.mdx
+++ b/apps/docs/content/guides/platform.mdx
@@ -19,6 +19,7 @@ Each project on Supabase comes with:
- [Edge Functions](/docs/guides/functions)
- [Realtime API](/docs/guides/realtime)
- [Storage](/docs/guides/storage)
+- [Warehouse](/docs/guides/warehouse)
## Organizations
diff --git a/apps/docs/content/guides/warehouse.mdx b/apps/docs/content/guides/warehouse.mdx
new file mode 100644
index 0000000000000..684ac83ea2861
--- /dev/null
+++ b/apps/docs/content/guides/warehouse.mdx
@@ -0,0 +1,74 @@
+---
+id: 'Analytics'
+title: 'Event Analytics'
+description: 'Scalable data analytics'
+subtitle: 'Scalable data analytics for observability, metrics, and more.'
+hideToc: true
+---
+
+Supabase Analytics is an event ingestion and querying engine, that allows for storing, dispatching, and querying of events from one or more databases.
+
+## Features
+
+### Scalable Storage and Querying Costs
+
+Columnar databases allows for fast analysis while providing compact storage mechanisms. The costs scale predictably according to the amount of data stored, allowing users to have a peace of mind when managing billing and infrastructure costs.
+
+Lucene-based event management systems worked well before the advent of scalable database options, but starts to get prohibitively expensive beyond a certain scale and volume, and the data would need to be shipped elsewhere to be further analyzed over the long term.
+
+Analytics connects to databases such as BigQuery to store massive volumes of data, while also providing tooling to abstract away the infrastructure intricacies of working with the underlying storage engine.
+
+### Bring Your Own Backends
+
+Analytics can integrate with your very own backends, with Analytics managing the ingestion pipeline and maximizing throughput of events. This ensures maximum flexibility for storing sensitive data.
+
+Bringing-your-own-backend gives Supabase customers complete control over storage and querying costs.
+
+### Schema Management
+
+When events are ingested, the backend's schema is automatically managed by Analytics, allowing you to insert JSON payloads without having to worry about data type changes.
+
+When new fields are sent to Analytics, the data type is detected automatically and merged into the current table schema.
+
+## Quickstart
+
+1. Create a collection
+
+ Head over to the [Logs & Analytics page](https://supabase.com/dashboard/project/_/logs/explorer).
+
+ Create a **New Collection** under the Analytics Events section.
+
+2. Retrieve ingestion access token
+
+ Retrieve your public ingestion access token by clicking [Analytics Settings](https://supabase.com/dashboard/project/_/settings/Analytics) and clicking on the copy button.
+
+3. Send an event
+
+ Execute this cURL command to send an event to Analytics.
+
+ Replace `YOUR-COLLECTION-NAME-HERE` and `YOUR-ACCESS-TOKEN-HERE` placeholders with the values from step 2.
+
+```bash
+# By collection name
+curl -X "POST" "https://api.warehouse.tech/api/events/json?collection_name=YOUR-COLLLECTION-NAME-HERE" \
+ -H 'Content-Type: application/json; charset=utf-8' \
+ -H 'Authorization: Bearer YOUR-ACCESS-TOKEN-HERE' \
+ -d $'[{
+ "message": "This is the main event message",
+ "metadata": {"some": "log event"}
+}]'
+
+
+# By collection UUID
+curl -X "POST" "https://api.warehouse.tech/api/events/json?collection=YOUR-COLLECTION-UUID-HERE" \
+ -H 'Content-Type: application/json; charset=utf-8' \
+ -H 'Authorization: Bearer YOUR-ACCESS-TOKEN-HERE' \
+ -d $'[{
+ "message": "This is the main event message",
+ "metadata": {"some": "log event"}
+}]'
+```
+
+4. **Check the Collection**
+
+You should see your new event pop up in the collection overview. You can then search and filter the collection for specific events using the filters, or use SQL to query the Analytics collections.
diff --git a/apps/docs/content/guides/warehouse/backends/bigquery.mdx b/apps/docs/content/guides/warehouse/backends/bigquery.mdx
new file mode 100644
index 0000000000000..367aef67d2325
--- /dev/null
+++ b/apps/docs/content/guides/warehouse/backends/bigquery.mdx
@@ -0,0 +1,73 @@
+---
+id: 'warehouse-bigquery'
+title: 'BigQuery'
+description: 'Learn how to use the BigQuery Backend'
+subtitle: 'Learn how to use the BigQuery Backend'
+sidebar_label: 'BigQuery'
+---
+
+Analytics natively supports the storage of events to BigQuery. Ingested events are **streamed** into BigQuery, and each collection is mapped to a BigQuery table.
+
+## Behavior and Configuration
+
+On table initialization, Analytics sets optimal configuration automatically to ensure that tables are optimized.
+
+### Ingestion
+
+Ingested events are [streamed](https://cloud.google.com/bigquery/docs/streaming-data-into-bigquery) into BigQuery. This allows us to maximize the throughput into BigQuery, allowing Analytics to handle large volumes of events.
+
+### Partitioning and Retention
+
+All tables are partitioned by the `timestamp` field, and are partitioned by **day**. This means that all queries across the BigQuery table must have a filter over the `timestamp` field.
+
+A collection's retention will adjust the BigQuery table's partition expiry, such that data is automatically deleted once the partition expires.
+
+For paid plans, if the retention is not set, the collection will default to **7 days** of retention on creation.
+
+For users on the Free plan, the maximum retention is **3 days**.
+
+#### Deep Dive: Table Partitioning
+
+Table partitioning effectively splits a BigQuery table into many smaller tables.
+When using partitioned tables, BigQuery storage is effectively half priced when the partitioned table is older than 90 days. When a table has not been modified in 90 days, BigQuery will only charge half the normal rate.
+
+When partitioned over time (which Analytics manages automatically), we are able to benefit from the discount by separating out the older and less-queried smaller tables, reducing total effective storage costs.
+
+Furthermore, by partitioning a table, we can then limit queries to scan data across only selected partitioned column/tables, making your queries even more responsive by scanning less data.
+
+When querying against the streaming buffer, the amount of bytes scanned is always zero, allowing near zero-cost queries against it. Should you need to query against the streaming buffer directly, you can use the following query ([source](https://stackoverflow.com/questions/41864257/how-to-query-for-data-in-streaming-buffer-only-in-bigquery)) to do so:
+
+```sql
+select fields from `my_collection` where _PARTITIONTIME is null;
+```
+
+You can read more about partitioned tables in the official Google Cloud [documentation](https://cloud.google.com/bigquery/docs/partitioned-tables).
+
+## Querying
+
+When querying your collections, use BigQuery SQL syntax refer to each collection by name. Analytics will automatically parse and process the query to map it to the correct dataset and table name. You can perform joins across multiple collections as well.
+
+### Unnesting Repeated Records
+
+Nested columns are represeted as repeated `RECORD`s in BigQuery. To query inside a nested record you must UNNEST it like so:
+
+```sql
+select timestamp, req.url, h.cf_cache_status
+from
+ `your_collection` as t
+ cross join UNNEST(t.metadata) as m
+ cross join UNNEST(m.request) as req
+ cross join UNNEST(m.response) as resp
+ cross join UNNEST(resp.headers) as h
+where DATE(timestamp) = "2019-05-09"
+order by timestamp desc
+limit 10;
+```
+
+### Query Result Limit
+
+There is a 1000 row result limit for each query run.
+
+### SELECT only
+
+Analytics only allows `SELECT` queries to be run. DDL statements will be blocked and will result in an error.
diff --git a/apps/docs/content/guides/warehouse/ingestion.mdx b/apps/docs/content/guides/warehouse/ingestion.mdx
new file mode 100644
index 0000000000000..f8df49fcde604
--- /dev/null
+++ b/apps/docs/content/guides/warehouse/ingestion.mdx
@@ -0,0 +1,293 @@
+---
+id: 'warehouse-ingestion'
+title: 'Event Ingestion'
+description: 'Learn how to use Supabase to ingest analytics events'
+subtitle: 'Learn how to use Supabase to ingest analytics events'
+sidebar_label: 'Ingestion'
+---
+
+Events are ingested through the ingestion API, where JSON payloads are processed and inserted into the configured backend.
+
+These processed payloads will be inserted into Sources. A **collection** is made of up of many **events**.
+
+If no additional backend is configured and attached to a collection, Analytics will insert into the default Supabase-managed BigQuery backend. This applies to all users, regardless of plan.
+
+## API Endpoints
+
+There are two ways in which you can ingest data, via specifying the collection UUID, or via the collection's name.
+
+```
+POST https://api.warehouse.tech/api/events?collection=9dd9a6f6-8e9b-4fa4-b682-4f2f5cd99da3
+
+POST https://api.warehouse.tech/api/events?collection_name=my.event.collection
+```
+
+
+When ingesting by name, ensure that the collection's name is unique, otherwise it will result in an error on ingestion and the event will be discarded.
+
+
+
+## Sending Events
+
+Events may have one or more keys, but will always have 2 standard top-level keys:
+
+| Field | Description |
+| --------------- | ----------------------------------------------------- |
+| `id` | An auto-generated UUID |
+| `timestamp` | An auto-generated timestamp. Can be provided by user. |
+| `event_message` | An event message. Aliased with the `message` field. |
+
+You can ingest events individually, or via a batch. It is recommended to batch your events for faster processing.
+
+To ingest individual events, send your POST request a singular object:
+
+```json
+{
+ "message": "your log event message",
+ "metadata": {...}
+}
+```
+
+To batch ingest, you can send the event as a JSON array:
+
+```json
+[
+ {"message": "your event message 1", "metadata": {...}},
+ {"message": "your event message 2", "metadata": {...}},
+ ...
+]
+```
+
+You can also place the array on the `batch` key of the POST body:
+
+```json
+{
+ "batch": [
+ {"message": "your event message 1", "metadata": {...}},
+ {"message": "your event message 2", "metadata": {...}},
+ ...
+ ]
+}
+```
+
+## Adaptive Schema
+
+As your application needs change, Analytics is able to detect and adjusting the underlying database schema accordingly. This adaptive schema allows you to focus on analyzing your events instead of having to manage your ingestion pipeline manually.
+
+Suppose your initial log events had the following shape:
+
+```json
+{
+ "message": "This is my event",
+ "metadata": { "my": "first event" }
+}
+```
+
+The generated schema would be the following:
+
+```
+id: uuid;
+timestamp: timestamp;
+event_message: string;
+metadata: {
+ my: string;
+}
+```
+
+As your application requirements change, you can add new fields to your events:
+
+```json
+{
+ "message": "This is my new log event",
+ "metadata": {
+ "my": "first log",
+ "counter": 123
+ }
+}
+```
+
+Analytics will then detect the schema change in the event, and add in the new column to the collection's underlying backend.
+
+```
+event_message: string;
+metadata: {
+ my: string;
+ counter: number;
+}
+```
+
+
+
+On high ingestion volume, Analytics will sample incoming events instead of checking each event. The sample rate decreases as the ingestion rate increases. The ingestion rate used is based on the individual local server that is performing the ingestion, not the global rate.
+
+| Ingestion Rate (per second) | Sample Ratio |
+| --------------------------- | ------------ |
+| < 10 | 1.0 |
+| 10-100 | 0.2 |
+| 100-500 | 0.1 |
+| 500-1000 | 0.05 |
+| > 1000 | 0.01 |
+
+
+
+## Schema
+
+Analytics maintains a schema to perform automatic migrations on the backend. The typings for each field are **strict** and cannot be changed once a new field is created. For example, converting a string column to a number column is not possible.
+
+Always plan your event schema changes to avoid typing errors on ingestion. Rejected events that do not adhere to the correct types required by the backend will receive an ingestion error and get discarded.
+
+### Key Transformation
+
+When ingesting, object keys will be transformed automatically to comply with the respective backend in use. For example, BigQuery column requirements require that names only contain letters (a-z, A-Z), numbers (0-9), or underscores (\_), and it must start with a letter or underscore. This will be automatically handled when ingesting data, such that invalid characters are removed or replaced.
+
+## Additive Changes Only
+
+Schema updates are additive and new columns cannot be removed. We recommend creating a new collection if a new schema is desired, and migrating any existing data manually.
+
+## Ingestion Access Tokens
+
+Ingest access tokens are used for ingesting data only. These keys are meant to be publicly used in client-side code.
+[Analytics Settings](https://supabase.com/dashboard/project/_/settings/warehouse).
+
+
+
+Ingest access tokens are different from [personal access tokens](https://supabase.com/dashboard/account/tokens). Personal access tokens are used for controlling your Supabase account, whereas Warehouse ingest access tokens are only used for ingesting data into Warehouse.
+
+
+
+We recommend rolling ingest access tokens frequently, as client-side code can be reverse engineered and malicious actors may abuse these ingest access tokens.
+
+To roll ingest access tokens:
+
+1. Create a new ingest access token in [Analytics Settings](https://supabase.com/dashboard/project/_/settings/warehouse).
+2. Deploy code with the new ingest access token.
+3. Revoke the old ingest access token in [Analytics Settings](https://supabase.com/dashboard/project/_/settings/warehouse).
+
+### Authentication
+
+There are 3 supported methods to attach an ingest accees token with the ingestion API:
+
+1. Using the `Authorization` header, with the format `Authorization: Bearer your-access-token-here`
+2. Using the `X-API-KEY` header, with the format `X-API-KEY: your-access-token-here`
+3. Using the `api_key` query parameter, wuth the format `?api_key=your-access-token-here`
+
+## Ingestion Parsers and Configurations
+
+Analytics features ingestion parsers for different types of event formats and processing. These out-of-the-box processors will help to auto-generate certain fields or reformat the payload for storage.
+
+### Generic JSON
+
+The generic JSON parser will perform event stringification to the `event_message` field for ease of visual scanning.
+
+```
+POST https://api.warehouse.tech/api/events/json?source=f6cccd3a-c42e-40f7-9b01-95d3699d3113
+Content-Type: application/json
+Authorization: Bearer XXXXX
+
+{
+ "status": "started"
+}
+```
+
+The corresponding event will be processed and ingested, where ellipsis represent autogenerated fields:
+
+```json
+{
+ "status": "started",
+ "event_message": "{\"status\":\"started\"}",
+ "timestamp": "...",
+ "id": "..."
+}
+```
+
+### Github Webhooks
+
+To ingest Github webhook requests, use the `/api/events/github` route:
+
+```
+https://api.warehouse.tech/api/events/github?api_key=XXXXX&collection=YOUR-COLLECTION-UUID
+```
+
+This parser will drop all keys ending in `_url` so it keeps your Github payloads in check and avoid schema bloat.
+
+### Heroku and Logplex
+
+The Logplex parser will parse syslog events using the Heroku dialect.
+
+To add Analytics to a heroku drain:
+
+```
+heroku drains:add "https://api.warehouse.tech.com/api/events/logplex?api_key=XXXXX&collection=YOUR-COLLECTION-UUID"
+```
+
+### Cloud Event
+
+The supported spec is [v1.0.2](https://github.com/cloudevents/spec/blob/v1.0.2/cloudevents/spec.md).
+
+#### Example
+
+```
+POST https://api.warehouse.tech/api/events/cloud-event?source=f6cccd3a-c42e-40f7-9b01-95d3699d3113
+Content-Type: application/json
+X-API-KEY: XXXXX
+CE-specversion: 1.0
+CE-id: 01HPPC9X0HPKB8E1RSPA5YFZB2
+CE-source: flyd
+CE-type: io.fly.machine.start
+CE-time: 2024-02-15T12:36:45+00:00
+
+{
+ "body": {},
+ "machine_id": "148ed193b95948",
+ "status": "started"
+}
+```
+
+The corresponding event will be processed and ingested, where ellipsis represent autogenerated fields:
+
+```json
+{
+ "cloud_event": {
+ "specversion": "1.0",
+ "id": "01HPPC9X0HPKB8E1RSPA5YFZB2",
+ "source": "flyd",
+ "type": "io.fly.machine.start",
+ "time": "2024-02-15T12:36:45+00:00"
+ },
+ "machine_id": "148ed193b95948",
+ "status": "started",
+ "event_message": "...",
+ "timestamp": "...",
+ "id": "..."
+}
+```
+
+## Best Practices
+
+### Stringify Keys to Avoid Bloated Schemas
+
+Always avoid logging large objects, as each object key (and all nested keys) will result in a new field created in the schema.
+
+To capture all contextual data, consider stringifying certain objects.
+
+For example, given a payload like the following:
+
+```json
+{ "some": { "very": "nested", "value": 123 } }
+```
+
+You can instead stringify the `some` key and set it to `some_str`:
+
+```json
+{ "some_str": "...." }
+```
+
+And you would then be able to query the field as a JSON string using BigQuery [JSON functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions):
+
+```sql
+select JSON_EXTRACT(t.some_str, "$") as some from `my.collection` as t;
+```
+
+### Always default to using floats
+
+Always default to floats, as certain backends such as BigQuery view floats and integers differently. Explicitly perform type casting to floats on the client where possible.
diff --git a/apps/docs/features/docs/GuidesMdx.utils.tsx b/apps/docs/features/docs/GuidesMdx.utils.tsx
index 49a2bf2c7fa9f..53d0cdcf29f1b 100644
--- a/apps/docs/features/docs/GuidesMdx.utils.tsx
+++ b/apps/docs/features/docs/GuidesMdx.utils.tsx
@@ -33,6 +33,7 @@ const PUBLISHED_SECTIONS = [
'resources',
'self-hosting',
'storage',
+ 'warehouse',
] as const
const getGuidesMarkdownInternal = async ({ slug }: { slug: string[] }) => {