Commit f5e9e9c8 authored by Htriedman's avatar Htriedman
Browse files

Merge branch 'isaac-update' into 'main'

more documentation, check for ns=0, set client cookies appropriately and only...

See merge request !1
parents aedb3c45 2b401103
......@@ -164,6 +164,11 @@ sub analytics_provenance_deliver_ {
}
}
/*****************************************************************************
* User Agent hints data
* See https://phabricator.wikimedia.org/T295073 for more details.
****************************************************************************/
/**
* If the request was made over a secure transport, then set the Accept-CH
* response header such that the following high-entropy client hints [0][1]
......@@ -204,79 +209,114 @@ sub analytics_client_hints_deliver_ {
}
}
/*****************************************************************************
* Differential Privacy Cookie
* See https://meta.wikimedia.org/wiki/Differential_privacy for more details.
****************************************************************************/
/**
* Receive and deliver client-side filtering for differential privacy — basically,
* we hash the page ID, truncate it to just three characters, check to see if
* is in the list already, and append it to the list if not.
* After the 10th pageview is seen, we exclude all following pageviews.
* This process resets at midnight UTC.
*
* Things I have no idea how to do in this section:
* - set global salt and have it reset each day
*/
sub analytics_differential_privacy_recv_ {
unset req.http.X-WMF-DP; // clear any sent by the user
// If the cookie has argument in the form "WMF-DP=21f,90a,...123," (between 1-10 hex hashes)
if (req.http.Cookie ~ "(^|;\s*)WMF-DP=([a-f0-9]{3},){1,10}(;|$)") {
// Save the value for use later in _deliver
set req.http.X-WMF-DP = regsub(
req.http.Cookie,
"^(?:.*;\s*)?WMF-DP=([^;]+).*$",
"\1"
);
}
unset req.http.X-page-ID; // clear any sent by the user
// If the cookie contains a page ID composed of a string of digits, set a local variable with the page ID
if (req.http.Cookie ~ "(^|;\s*)page_id=(\d+).*(;|$)") {
// If the cookie has argument in the form "include_pv=0" (auto-exclude; no further processing needed)
if (req.http.Cookie ~ "(^|;\s*)include_pv=0(;|$)") {
// Set to exclude for use later in _deliver
set req.http.X-Include-PV = 0;
// Article pageview (x-analytics has namespace=0 and page ID) so we should process for DP
} else if (req.http.X-Analytics ~ "(^|;\s*)ns=0(;|$)" && req.http.X-Analytics ~ "(^|;\s*)page_id=(\d+).*(;|$)") {
unset req.http.X-page-ID; // clear any sent by the user
set req.http.X-page-ID = regsub(
req.http.Cookie,
req.http.X-Analytics,
"(^|;\s*)page_id=(\d+).*(;|$)",
"\1"
);
unset req.http.X-WMF-DP; // clear any sent by the user
// If the cookie has argument in the form "WMF-DP=21f,90a,...123," (between 1-9 hex hashes)
if (req.http.Cookie ~ "(^|;\s*)WMF-DP=([a-f0-9]{3},){1,9}(;|$)") {
// Save the value for use later in _deliver
set req.http.X-WMF-DP = regsub(
req.http.Cookie,
"^(?:.*;\s*)?WMF-DP=([^;]+).*$",
"\1"
);
}
}
}
sub analytics_differential_privacy_deliver_ {
// If list doesn't exist, create it as an empty string
if (!req.http.X-WMF-DP) {
set req.http.X-WMF-DP = "";
}
/*****************************************************************************
* !!! private to analytics_last_access_deliver !!!!
* Always expire at midnight UTC by adding 12 hours and rounding to nearest day
****************************************************************************/
sub set_differential_privacy_cookie__ {
header.append(resp.http.Set-Cookie,
"WMF-DP="
+ req.http.X-WMF-DP
+ ";Path=/;HttpOnly;secure;Expires="
+ std.time(std.time2integer(now + 12h, 0) / 86400 * 86400, now)
);
}
// Default to not including the page view
set req.http.X-Include-PV = 0;
/*****************************************************************************
* Same as set_differential_privacy_cookie__ but excludes all following pageviews
* via include_pv=0 cookie
****************************************************************************/
sub override_differential_privacy_cookie__ {
// NOTE: not clear to me if we have to overwrite WMF-DP with empty or can just exclude and it'll be removed
header.append(resp.http.Set-Cookie,
"WMF-DP=;include_pv=0;Path=/;HttpOnly;secure;Expires="
+ std.time(std.time2integer(now + 12h, 0) / 86400 * 86400, now)
);
}
// Create X-NowDay in "01-Jan-2000" form, from "now"
set req.http.X-NowDay = regsub(
now, "^..., (..) (...) (....) .*$", "\1-\2-\3"
);
sub analytics_differential_privacy_deliver_ {
// If known exclude from _recv, pass along but no other action required
if (req.http.X-Include-PV == 0) {
set resp.http.X-Analytics = resp.http.X-Analytics + ";include_pv=0";
} else if (req.http.X-page-ID) { // pageview to article based on _recv
// If list doesn't exist, create it as an empty string
if (!req.http.X-WMF-DP) {
set req.http.X-WMF-DP = "";
}
// Assume that LastGlobalStamp and LastStamp exist, because this is called after analytics_last_access_recv_
// If neither LastGlobalStamp nor LastStamp are from today, reset DP cookie to empty string
if (req.http.X-NowDay != req.http.X-WMF-LastGlobalStamp && req.http.X-NowDay != req.http.X-WMF-LastStamp) {
set req.http.X-WMF-DP = "";
}
// If X-WMF-DP cookie has between 0 and 40 chars & page ID exists
// Note: 40 chars because we're looking at 10 pages with 4 chars / page
if (req.http.X-WMF-DP ~ "^.{0,40}$" && req.http.X-page-ID) {
// call helper function to hash the page ID and set result to a local variable
call dp_hash_vcl;
set id_hash = req.http.X-Hash;
// If the hash is not in the list, add it to the list and set X-Include-PV to 1
if (req.http.X-WMF-DP !~ id_hash) {
set req.http.X-WMF-DP = req.http.X-WMF-DP + id_hash + ",";
set req.http.X-Include-PV = 1;
// Default to not including the page view
set req.http.X-Include-PV = 0;
// If X-WMF-DP cookie has between 0 and 36 chars
// Note: 36 chars because we're looking at 9 pages so far with 4 chars / page (3 char hex + comma)
if (req.http.X-WMF-DP ~ "^.{0,36}$") {
// call helper function to hash the page ID and set result to a local variable
call dp_hash_vcl;
set id_hash = req.http.X-Hash;
// If the hash is already in the list, X-Include-PV already set to 0 and cookie doesn't change
// If the hash is not in the list, add it to the list, set X-Include-PV to 1, and update cookies
if (req.http.X-WMF-DP !~ id_hash) {
set req.http.X-WMF-DP = req.http.X-WMF-DP + id_hash + ",";
set req.http.X-Include-PV = 1;
// if this is 10th pageview, clear cookie and exclude this and all following pageviews
if (req.http.X-WMF-DP ~ "^.{40}$") {
call override_differential_privacy_cookie__;
} else { // update WMF-DP cookie with new page ID
call set_differential_privacy_cookie__;
}
}
}
}
// Set X-Analytics to include the (potentially updated) values of X-WMF-DP and X-Include-PV
set resp.http.X-Analytics = resp.http.X-Analytics
+ ";WMF-DP="
+ req.http.X-WMF-DP + id_hash + ","
+ ";include_pv="
+ req.http.X-Include-PV
// Set X-Analytics to include X-Include-PV
set resp.http.X-Analytics = resp.http.X-Analytics
+ ";include_pv="
+ req.http.X-Include-PV;
}
}
/*****************************************************************************
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment