From 533d0b70b82f8c9179defa4c91bf48541e26f1bc Mon Sep 17 00:00:00 2001 From: Martin Tournoij Date: Thu, 11 Apr 2024 10:30:05 +0100 Subject: [PATCH] Remove "salt" from sessions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This doesn't really add anything, not really. The way this has worked for a long time (since 2020, a1aa1c5) is: 1. A sessionHash is created as hash(salt + User-Agent + IP). 2. Store this in memory as a sessionHash→UUIDv4 map. 3. Use this UUIDv4 in the database and such. 4. The salt changes every few hours, resulting in new hashes and new UUIDs So it never hits the disk, and the only thing the salt really adds is rotation of the sessionHashes. This is different from before, where the session ID was stored directly, but that was a long time ago and in the very early versions only. So now it just expire the sessionHash after a few hours. --- README.md | 2 +- cmd/goatcounter/help.go | 3 +- cron/size_stat_test.go | 24 +++--- cron/tasks.go | 1 - docs/sessions.md | 164 --------------------------------------- handlers/count_test.go | 34 ++------ handlers/http_test.go | 2 +- hit_stats_test.go | 24 +++--- memstore.go | 151 ++++++++++++----------------------- settings.go | 4 +- tpl.go | 3 +- tpl/help/sessions.md | 61 +++++++++++++++ tpl/home.gohtml | 3 +- tpl/settings_main.gohtml | 2 +- 14 files changed, 150 insertions(+), 328 deletions(-) create mode 100644 tpl/help/sessions.md diff --git a/README.md b/README.md index 3f5cd2648..ce6727ae9 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Features [privacy]: https://www.goatcounter.com/privacy [gdpr]: https://www.goatcounter.com/gdpr -[sessions]: https://github.com/arp242/goatcounter/blob/master/docs/sessions.md +[sessions]: http://www.goatcounter.com/help/sessions Getting data in to GoatCounter diff --git a/cmd/goatcounter/help.go b/cmd/goatcounter/help.go index 045da72d8..4f60f36d7 100644 --- a/cmd/goatcounter/help.go +++ b/cmd/goatcounter/help.go @@ -237,6 +237,7 @@ commas. all Show debug logs for all of the below. acme ACME certificate creation. + cli-trace Show stack traces in errors on the CLI. cron Background "cron" jobs. cron-acme Cron jobs for ACME certificate creations. dashboard Dashboard view. @@ -245,7 +246,7 @@ commas. import-api Imports from the API. memstore Storing of pageviews in the database. monitor Additional logs in "goatcounter monitor" . - cli-trace Show stack traces in errors on the CLI. + session Internal "session" generation to track visitors . startup Some additional logs during startup. vacuum Deletion of old deleted sites and old pageviews. ` diff --git a/cron/size_stat_test.go b/cron/size_stat_test.go index 3dd5387ed..fc1b6c31c 100644 --- a/cron/size_stat_test.go +++ b/cron/size_stat_test.go @@ -39,12 +39,12 @@ func TestSizeStats(t *testing.T) { want := `{ "more": false, "stats": [ - {"count": 0, "id": "phone", "name": "Phones"}, - {"count": 0, "id": "largephone", "name": "Large phones, small tablets" }, - {"count": 0, "id": "tablet", "name": "Tablets and small laptops"}, - {"count": 2, "id": "desktop", "name": "Computer monitors"}, - {"count": 0, "id": "desktophd", "name": "Computer monitors larger than HD"}, - {"count": 0, "id": "unknown", "name": "(unknown)"} + {"count": 0, "id": "phone", "name": ""}, + {"count": 0, "id": "largephone", "name": "" }, + {"count": 0, "id": "tablet", "name": ""}, + {"count": 2, "id": "desktop", "name": ""}, + {"count": 0, "id": "desktophd", "name": ""}, + {"count": 0, "id": "unknown", "name": ""} ] }` @@ -71,12 +71,12 @@ func TestSizeStats(t *testing.T) { want = `{ "more": false, "stats": [ - {"count": 0, "id": "phone", "name": "Phones"}, - {"count": 0, "id": "largephone", "name": "Large phones, small tablets" }, - {"count": 0, "id": "tablet", "name": "Tablets and small laptops"}, - {"count": 3, "id": "desktop", "name": "Computer monitors"}, - {"count": 0, "id": "desktophd", "name": "Computer monitors larger than HD"}, - {"count": 1, "id": "unknown", "name": "(unknown)"} + {"count": 0, "id": "phone", "name": ""}, + {"count": 0, "id": "largephone", "name": "" }, + {"count": 0, "id": "tablet", "name": ""}, + {"count": 3, "id": "desktop", "name": ""}, + {"count": 0, "id": "desktophd", "name": ""}, + {"count": 1, "id": "unknown", "name": ""} ] }` if d := ztest.Diff(zjson.MustMarshalString(have), want, ztest.DiffJSON); d != "" { diff --git a/cron/tasks.go b/cron/tasks.go index c5f1963d9..b1462fa1b 100644 --- a/cron/tasks.go +++ b/cron/tasks.go @@ -217,6 +217,5 @@ func vacuumDeleted(ctx context.Context) error { func sessions(ctx context.Context) error { goatcounter.Memstore.EvictSessions() - goatcounter.Memstore.RefreshSalt() return nil } diff --git a/docs/sessions.md b/docs/sessions.md index 1618d58b2..e69de29bb 100644 --- a/docs/sessions.md +++ b/docs/sessions.md @@ -1,164 +0,0 @@ -Session tracking -================ - -*2022 preface*: this document mostly served as a design document for when I -wrote the session tracking code back in 2019. It's not intended as an overview -of "all possible techniques one could use in $current_year"; the examination of -existing solutions is likely incomplete, and sometimes probably outdated as -platforms evolve. - -In short: I don't keep it updated. - ---- - - -"Session tracking" allows more advanced tracking than just the "pageview" -counter we have now. A "session" is a single browsing session people have on a -website. - -Right now, every pageview shows up as-is in the dashboard, including things like -page refreshes. There is also no ability to determine things like conversion -rates and the like. - -Goals: - -- Avoid requiring GDPR consent notices. - -- The ability to view the number of "unique visitors" rather than just - "pageviews". - -- Basic "bounce rate" and "conversion rate"-like statistics; for example, if - someone enters on /foo we want to be able to see how many leave after visiting - just that page, or how many end up on /signup. - -Non-goals: - -- Track beyond a single browsing session. - - -Existing solutions ------------------- - -An overview of existing solutions that I'm aware or with roughly the same goals. - -Ackee ------ - -https://github.com/electerious/Ackee/blob/master/docs/Anonymization.md - -> Uses a one-way salted hash of the IP, User-Agent, and sites.ID. The hash changes -> daily and is never stored. -> -> This way a visitor can be tracked for one day at the most. - -This seems like a decent enough approach, and it doesn't require storing any -information in the browser with e.g. a cookie. - -It does generate a persistent device-unique identifier though, and I'm not sure -this is enough anonymisation in the context of the GDPR (although it may be? -It's hard to say anything conclusive about this at the moment) - -Fathom ------- - -https://usefathom.com/blog/anonymization - -> Unique siteviews are tracked by a hash which includes the site.ID; unique -> pageviews are tracked by as hash which includes the site.ID and path being -> tracked. -> -> To mark previous requests "finished" (not sure what that means) the current -> pageview's hash is removed and moved to the newest pageview. - -I'm not entirely sure if it's actually better or more "private" than Ackee's -simpler method. The Fathom docs mention that they "can’t put together an -anonymous, individual user’s browsing habits", but is seeing which path people -take on your website really tracking someone's "browsing habits", or can this -lead to identifying a "natural person"? - -Or, to give an analogy: I'm not sure if there's anything wrong with just seeing -where your customers go in your store. The problems start when you start -creating profiles of those people on recurring visits, or when you see where -they go to other stores, too. - - -SimpleAnalytics ---------------- - -https://docs.simpleanalytics.com/uniques - -> If the Referer header is another.site or missing it's counted as a unique -> visit; if it's mysite.com then it's counted as a recurring visit. - -A lot of browsers/people don't send a Referer header (somewhere between ~30% and -~50%); this number is probably higher since Referer is set more often for -requests in the same domain, but probably not 100%. - -This is a pretty simple method, but it doesn't allow showing bounce or -conversion rates or other slightly more advanced statistics. - - -Simple Web Analytics --------------------- - -https://simple-web-analytics.com/ - -Uses the browser cache to achieve season tracking: The endpoint being called by -the tracking code sets the `Expire` header to the next calendar day of the -user's timezone. - -This ensures the server only gets hit once per day per session; subsequent -requests are not tracked at all. - -In practice there are cases where a session is counted more than once. Firefox -for example ignores the HTTP cache when the user hits the reload button. - -It's a simple and un-complex approach, and doesn't require storing any -information about the user (hashed or otherwise) on the server. The downside is -that intermediate requests are not tracked at all, which would make it -unsuitable for GoatCounter. - - -GoatCounter's solution ----------------------- - -- Create a server-side hash: hash(site.ID, User-Agent, IP, salt) to identify - the client without storing any personal information directly. - -- Don't persist the hash to disk; this isn't really needed as we just want to - track the "browsing session" rather than re-identify someone coming back the - next day. - -- The salt is rotated every 4 hour on a sliding schedule; when a new pageview - comes in we try to find an existing session based on the current and previous - salt. This ensures there isn't some arbitrary cut-off time when the salt is - rotated. After 8 hours, the salt is permanently deleted. - -- If a user visits the next time, they will have the same hash, but the system - has forgotten about it by then. - -The whole hashing thing is *kind of* superfluous since the data is never stored -to disk with one exception: it's temporarily stored on shutdown, to be read and -deleted on startup. It doesn't hurt to hash the data though, and better safe -than sorry. - -I considered generating the ID on the client side as a session cookie or -localStorage, but this is tricky due to the ePrivacy directive, which requires -that *"users are provided with clear and precise information in accordance with -Directive 95/46/EC about the purposes of cookies"* and should be offered the -*"right to refuse"*, making exceptions only for data that is *"strictly -necessary in order to provide a [..] service explicitly requested by the -subscriber or user"*. - -Ironically, using a cookie would not only make things simpler but also *more* -privacy friendly, as there would be no salt stored on the server, and the user -has more control. It is what it is 🤷 - -I'm not super keen on adding the IP address in the hash, as IP addresses are -quite ephemeral; think about moving from WiFi to 4G for example, or ISPs who -recycle IP addresses a lot. There's no clear alternatives as far as I know -though, but it may be replaced with something else in the future. - -Fathom's solution with multiple hashes seems rather complex, without any clear -advantages; using just a single hash like this already won't store more -information than before, and the hash is stored temporarily. diff --git a/handlers/count_test.go b/handlers/count_test.go index 5b4e0a6ef..6be3d814f 100644 --- a/handlers/count_test.go +++ b/handlers/count_test.go @@ -270,30 +270,6 @@ func TestBackendCountSessions(t *testing.T) { } } - rotate := func(ctx context.Context) { - now = now.Add(12 * time.Hour) - oldCur, _ := goatcounter.Memstore.GetSalt() - - goatcounter.Memstore.RefreshSalt() - - _, prev := goatcounter.Memstore.GetSalt() - if string(prev) != string(oldCur) { - t.Fatalf("salts not cycled?\noldCur: %s\nprev: %s\n", string(oldCur), string(prev)) - } - } - - // Ensure salts aren't cycled before they should. - beforeCur, beforePrev := goatcounter.Memstore.GetSalt() - now = now.Add(1 * time.Hour) - goatcounter.Memstore.RefreshSalt() - afterCur, afterPrev := goatcounter.Memstore.GetSalt() - - before := string(beforeCur) + " → " + string(beforePrev) - after := string(afterCur) + " → " + string(afterPrev) - if before != after { - t.Fatalf("salts cycled too soon\nbefore: %s\nafter: %s", before, after) - } - send(ctx1, "test") send(ctx1, "test") send(ctx1, "other") @@ -308,8 +284,9 @@ func TestBackendCountSessions(t *testing.T) { want := []int{1, 1, 2, 3, 3, 1, 2} checkSess(append(hits1, hits2...), want) - // Rotate, should still use the same sessions. - rotate(ctx1) + // Should still use the same sessions. + goatcounter.SessionTime = 1 * time.Second + goatcounter.Memstore.EvictSessions() send(ctx1, "test") send(ctx2, "test") hits1 = checkHits(ctx1, 6) @@ -317,8 +294,9 @@ func TestBackendCountSessions(t *testing.T) { want = []int{1, 1, 2, 3, 3, 1, 2, 1, 3} checkSess(append(hits1, hits2...), want) - // Rotate again, should use new sessions from now on. - rotate(ctx1) + // Should use new sessions from now on. + now = time.Date(2019, 6, 18, 14, 42, 2, 0, time.UTC) + goatcounter.Memstore.EvictSessions() send(ctx1, "test") send(ctx2, "test") hits1 = checkHits(ctx1, 7) diff --git a/handlers/http_test.go b/handlers/http_test.go index cf83f052f..2e1d6ea2a 100644 --- a/handlers/http_test.go +++ b/handlers/http_test.go @@ -70,7 +70,7 @@ func TestMain(m *testing.M) { // Don't need tests. "", "bosmang.gohtml", "bosmang_site.gohtml", "bosmang_cache.gohtml", "bosmang_bgrun.gohtml", "bosmang_metrics.gohtml", "bosmang_sites.gohtml", - "i18n_list.gohtml", "i18n_show.gohtml", + "i18n_list.gohtml", "i18n_show.gohtml", "i18n_manage.gohtml", // Tested in tpl_test.go "email_export_done.gotxt", "email_forgot_site.gotxt", diff --git a/hit_stats_test.go b/hit_stats_test.go index b4326c547..aece77004 100644 --- a/hit_stats_test.go +++ b/hit_stats_test.go @@ -135,32 +135,32 @@ func TestHitStats(t *testing.T) { "stats": [ { "id": "phone", - "name": "Phones", + "name": "", "count": 0 }, { "id": "largephone", - "name": "Large phones, small tablets", + "name": "", "count": 1 }, { "id": "tablet", - "name": "Tablets and small laptops", + "name": "", "count": 0 }, { "id": "desktop", - "name": "Computer monitors", + "name": "", "count": 1 }, { "id": "desktophd", - "name": "Computer monitors larger than HD", + "name": "", "count": 0 }, { "id": "unknown", - "name": "(unknown)", + "name": "", "count": 0 } ] @@ -288,32 +288,32 @@ func TestListSizes(t *testing.T) { "stats": [ { "id": "phone", - "name": "Phones", + "name": "", "count": 1 }, { "id": "largephone", - "name": "Large phones, small tablets", + "name": "", "count": 1 }, { "id": "tablet", - "name": "Tablets and small laptops", + "name": "", "count": 1 }, { "id": "desktop", - "name": "Computer monitors", + "name": "", "count": 1 }, { "id": "desktophd", - "name": "Computer monitors larger than HD", + "name": "", "count": 3 }, { "id": "unknown", - "name": "(unknown)", + "name": "", "count": 1 } ] diff --git a/memstore.go b/memstore.go index f36822449..dcbbcd634 100644 --- a/memstore.go +++ b/memstore.go @@ -6,14 +6,10 @@ package goatcounter import ( "context" - "crypto/sha256" - "encoding" - "encoding/base64" "errors" "fmt" "net/url" "slices" - "strconv" "strings" "sync" "time" @@ -22,7 +18,6 @@ import ( "zgo.at/zdb" "zgo.at/zlog" "zgo.at/zstd/zbool" - "zgo.at/zstd/zcrypto" "zgo.at/zstd/zint" "zgo.at/zstd/ztime" "zgo.at/zstd/ztype" @@ -35,40 +30,17 @@ var ( TestSeqSession = zint.Uint128{TestSession[0], TestSession[1] + 1} ) -// The json encoder doesn't like binary data, so base64 it; need struct as it'll -// ignore MarshalText on "type hash string" (but not UnmarshalText? Hmm) -type hash struct{ v string } - -var ( - _ encoding.TextMarshaler = hash{} - _ encoding.TextUnmarshaler = &hash{} -) - -// MarshalText converts the data to a human readable representation. -func (h hash) MarshalText() ([]byte, error) { - b := base64.StdEncoding.EncodeToString([]byte(h.v)) - return []byte(b), nil -} - -// UnmarshalText parses text in to the Go data structure. -func (h *hash) UnmarshalText(v []byte) error { - b, err := base64.StdEncoding.DecodeString(string(v)) - h.v = string(b) - return err -} +type sessionKey string type ms struct { hitMu sync.RWMutex hits []Hit sessionMu sync.RWMutex - sessions map[hash]zint.Uint128 // Hash → sessionID - sessionHashes map[zint.Uint128]hash // sessionID → hash + sessions map[sessionKey]zint.Uint128 // sessionKey → sessionID + sessionHashes map[zint.Uint128]sessionKey // sessionID → sessionKey sessionPaths map[zint.Uint128]map[int64]struct{} // SessionID → path_id sessionSeen map[zint.Uint128]int64 // SessionID → lastseen - curSalt []byte - prevSalt []byte - saltRotated time.Time testHook bool } @@ -76,26 +48,20 @@ type ms struct { var Memstore ms type storedSession struct { - Sessions map[hash]zint.Uint128 `json:"sessions"` - Hashes map[zint.Uint128]hash `json:"hashes"` - Paths map[zint.Uint128]map[int64]struct{} `json:"paths"` - Seen map[zint.Uint128]int64 `json:"seen"` - CurSalt []byte `json:"cur_salt"` - PrevSalt []byte `json:"prev_salt"` - SaltRotated time.Time `json:"salt_rotated"` + Sessions map[sessionKey]zint.Uint128 `json:"sessions"` + Hashes map[zint.Uint128]sessionKey `json:"hashes"` + Paths map[zint.Uint128]map[int64]struct{} `json:"paths"` + Seen map[zint.Uint128]int64 `json:"seen"` } func (m *ms) Reset() { m.sessionMu.Lock() defer m.sessionMu.Unlock() - m.sessions = make(map[hash]zint.Uint128) - m.sessionHashes = make(map[zint.Uint128]hash) + m.sessions = make(map[sessionKey]zint.Uint128) + m.sessionHashes = make(map[zint.Uint128]sessionKey) m.sessionPaths = make(map[zint.Uint128]map[int64]struct{}) m.sessionSeen = make(map[zint.Uint128]int64) - m.curSalt = []byte(zcrypto.Secret256()) - m.prevSalt = []byte(zcrypto.Secret256()) - m.saltRotated = ztime.Now() TestSeqSession = zint.Uint128{TestSession[0], TestSession[1] + 1} } @@ -149,16 +115,6 @@ func (m *ms) Init(db zdb.DB) error { if stored.Seen != nil { m.sessionSeen = stored.Seen } - if len(stored.CurSalt) > 0 { - m.curSalt = stored.CurSalt - } - if len(stored.PrevSalt) > 0 { - m.prevSalt = stored.PrevSalt - } - if !stored.SaltRotated.IsZero() { - m.saltRotated = stored.SaltRotated - } - return nil } @@ -167,13 +123,10 @@ func (m *ms) StoreSessions(db zdb.DB) { defer m.sessionMu.Unlock() d, err := json.Marshal(storedSession{ - Sessions: m.sessions, - Paths: m.sessionPaths, - Seen: m.sessionSeen, - Hashes: m.sessionHashes, - CurSalt: m.curSalt, - PrevSalt: m.prevSalt, - SaltRotated: m.saltRotated, + Sessions: m.sessions, + Paths: m.sessionPaths, + Seen: m.sessionSeen, + Hashes: m.sessionHashes, }) if err != nil { zlog.Error(err) @@ -353,23 +306,8 @@ func (m *ms) processHit(ctx context.Context, h *Hit) bool { return true } -func (m *ms) GetSalt() (cur []byte, prev []byte) { - m.sessionMu.Lock() - defer m.sessionMu.Unlock() - return m.curSalt, m.prevSalt -} - -func (m *ms) RefreshSalt() { - m.sessionMu.Lock() - defer m.sessionMu.Unlock() - - if m.saltRotated.Add(4 * time.Hour).After(ztime.Now()) { - return - } - - m.prevSalt = m.curSalt[:] - m.curSalt = []byte(zcrypto.Secret256()) -} +// Maximum length of sessions; exported here for tests. +var SessionTime = 8 * time.Hour // For 10k sessions this takes about 5ms on my laptop; that's a small enough // delay to not overly worry about (there are rarely more than a few hundred @@ -378,17 +316,24 @@ func (m *ms) EvictSessions() { m.sessionMu.Lock() defer m.sessionMu.Unlock() - ev := ztime.Now().Add(-4 * time.Hour).Unix() - for sID, seen := range m.sessionSeen { + ev := ztime.Now().Add(-SessionTime).Unix() + for id, seen := range m.sessionSeen { if seen > ev { continue } - hash := m.sessionHashes[sID] - delete(m.sessions, hash) - delete(m.sessionPaths, sID) - delete(m.sessionSeen, sID) - delete(m.sessionHashes, sID) + sk := m.sessionHashes[id] + + sessLog.Fields(zlog.F{ + "session-id": id, + "last-seen": seen, + "session-key": sk, + }).Debug("evicting session") + + delete(m.sessions, sk) + delete(m.sessionPaths, id) + delete(m.sessionSeen, id) + delete(m.sessionHashes, id) } } @@ -401,43 +346,45 @@ func (m *ms) SessionID() zint.Uint128 { return UUID() } -func (m *ms) session(ctx context.Context, siteID, pathID int64, userSessionID, ua, remoteAddr string) (zint.Uint128, zbool.Bool) { - sessionHash := hash{userSessionID} +var sessLog = zlog.Module("session") +func (m *ms) session(ctx context.Context, siteID, pathID int64, userSessionID, ua, remoteAddr string) (zint.Uint128, zbool.Bool) { + sk := sessionKey(userSessionID) if userSessionID == "" { - h := sha256.New() - h.Write(append(append(append(m.curSalt, ua...), remoteAddr...), strconv.FormatInt(siteID, 10)...)) - sessionHash = hash{string(h.Sum(nil))} + sk = sessionKey(fmt.Sprintf("%s-%s-%d", ua, remoteAddr, siteID)) } m.sessionMu.Lock() defer m.sessionMu.Unlock() - id, ok := m.sessions[sessionHash] - if !ok && userSessionID == "" { // Try previous hash - h := sha256.New() - h.Write(append(append(append(m.prevSalt, ua...), remoteAddr...), strconv.FormatInt(siteID, 10)...)) - prev := hash{string(h.Sum(nil))} - id, ok = m.sessions[prev] - if ok { - sessionHash = prev - } - } - + id, ok := m.sessions[sk] if ok { // Existing session m.sessionSeen[id] = ztime.Now().Unix() _, seenPath := m.sessionPaths[id][pathID] if !seenPath { m.sessionPaths[id][pathID] = struct{}{} } + + sessLog.Fields(zlog.F{ + "session-key": sk, + "session-id": id, + "path": pathID, + "seen-path": seenPath, + }).Debug("HIT") return id, zbool.Bool(!seenPath) } // New session id = m.SessionID() - m.sessions[sessionHash] = id + m.sessions[sk] = id m.sessionPaths[id] = map[int64]struct{}{pathID: struct{}{}} m.sessionSeen[id] = ztime.Now().Unix() - m.sessionHashes[id] = sessionHash + m.sessionHashes[id] = sk + + sessLog.Fields(zlog.F{ + "session-key": sk, + "session-id": id, + "path": pathID, + }).Debug("MISS: created new") return id, true } diff --git a/settings.go b/settings.go index 3a7c11a57..adc9e50e4 100644 --- a/settings.go +++ b/settings.go @@ -405,7 +405,7 @@ func (ss SiteSettings) CollectFlags(ctx context.Context) []CollectFlag { return []CollectFlag{ { Label: z18n.T(ctx, "data-collect/label/sessions|Sessions"), - Help: z18n.T(ctx, "data-collect/help/sessions|Track unique visitors for up to 8 hours; if you disable this then someone pressing e.g. F5 to reload the page will just show as 2 pageviews instead of 1"), + Help: z18n.T(ctx, "data-collect/help/sessions|%[Track unique visitors] for up to 8 hours; if you disable this then someone pressing e.g. F5 to reload the page will just show as 2 pageviews instead of 1.", z18n.Tag("a", `href="/help/sessions"`)), Flag: CollectSession, }, { @@ -435,7 +435,7 @@ func (ss SiteSettings) CollectFlags(ctx context.Context) []CollectFlag { }, { Label: z18n.T(ctx, "data-collect/label/language|Language"), - Help: z18n.T(ctx, "data-collect/help/language|Supported languages from Accept-Language"), + Help: z18n.T(ctx, "data-collect/help/language|Supported languages from Accept-Language."), Flag: CollectLanguage, }, } diff --git a/tpl.go b/tpl.go index 0c9fddcf3..b5be4a4f8 100644 --- a/tpl.go +++ b/tpl.go @@ -206,13 +206,14 @@ func init() { {href: "domains", label: "Track multiple domains/sites?"}, {href: "spa", label: "Add GoatCounter to a SPA?"}, {href: "campaigns", label: "Track campaigns?"}, - {href: "countjs-versions", label: "count.js versions and SRI"}, + {href: "countjs-versions", label: "Use SRI with count.js?"}, {href: "countjs-host", label: "Host count.js somewhere else?"}, {href: "frame", label: "Embed GoatCounter in a frame?"}}}, {label: "Other", items: []x{ // TODO: add "adblock" page // TODO: add "campiagns page"; link in "settings_main". {href: "export", label: "Export format"}, + {href: "sessions", label: "Sessions and visitors"}, {href: "api", label: "API"}, {href: "faq", label: "FAQ"}, {href: "translating", label: "Translating GoatCounter"}}}, diff --git a/tpl/help/sessions.md b/tpl/help/sessions.md new file mode 100644 index 000000000..ff9040145 --- /dev/null +++ b/tpl/help/sessions.md @@ -0,0 +1,61 @@ +GoatCounter only counts "visits" rather than "pageviews": + +- A "pageview" is every time a page is loaded. + +- A "visit" is the first time someone loads a page. Someone reloading the page + or going to another page and then coming back is counted as one visit. + +You almost always want to keep track of visits rather than pageviews. Otherwise +someone reloading the page ten times will show up as ten times, which is not +really meaningful. + +This can be disabled in the site settings, at `Settings → Data collection → +Sessions`. If it's disabled every pageview counts as a "visit". + +Technical details +----------------- +The way visitors are identified is as follows: + +1. A sessionHash is created as hash(siteID + User-Agent + IP). + +2. Store this in memory as a sessionHash→UUIDv4 map for 8 hours. + +3. Store a UUIDv4→seen_paths map (again in memory), so we can count new visits + for new paths. + +4. Use the UUIDv4 in the database and such. + +The IP address and User-Agent are never stored to the database or disk, and +there is no conceivable way to trace the random UUID back to this. + +It's only stored in memory, which is needed anyway for basic networking to work. + +---- + +Or in pseudo-code: + + session_key = site_id + user_agent + IP + count_as_visit = false + + # We've seen this session before. + if sessions[session_key] and sessions[session_key].newer_than(8_hours) + # Only count as visit if this session hasn't visited this path yet. + if not sessions[session_key].seen_path(current_path) + count_as_visit = true + add_current_path(sessions[session_key]) + end + else + # Generate new session. + sessions[session_key] = create_random_uuid() + add_current_path(sessions[session_key]) + count_as_visit = true + end + + # Store pageview; only the random UUID in sessions[session_key] is stored, + # and NOT session_key + store_pageview() + + # Increate counter to make the charts go up. + if count_as_visit + increase_counter_in_database() + end diff --git a/tpl/home.gohtml b/tpl/home.gohtml index c85df1d67..2ec7400ee 100644 --- a/tpl/home.gohtml +++ b/tpl/home.gohtml @@ -56,8 +56,7 @@

Identify unique visits without cookies or persistently storing any personal data - (technical details). -

+ (details).

Keeps useful statistics such as browser information, location, and screen size. Keep diff --git a/tpl/settings_main.gohtml b/tpl/settings_main.gohtml index aaf068f56..3fe4d5f1b 100644 --- a/tpl/settings_main.gohtml +++ b/tpl/settings_main.gohtml @@ -125,7 +125,7 @@ {{range $cf := .Site.Settings.CollectFlags .Context}}

{{$cf.Help}}
+
{{$cf.Help | unsafe}}
{{if eq $cf.Label "Region"}}