From 0333bec98682f8e426d27782995baedc0201c6b9 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Mon, 3 Jun 2024 23:40:38 -0700 Subject: [PATCH] finish clickhouse journeys query --- .../(main)/reports/[reportId]/ReportPage.tsx | 8 +- .../reports/journey/JourneyParameters.tsx | 24 ++- .../(main)/reports/journey/JourneyReport.tsx | 2 +- .../(main)/reports/journey/JourneyView.tsx | 5 +- src/components/messages.ts | 2 + src/pages/api/reports/index.ts | 2 +- src/pages/api/reports/journey.ts | 12 ++ src/queries/analytics/reports/getJourney.ts | 138 ++++++++++++++---- 8 files changed, 151 insertions(+), 42 deletions(-) diff --git a/src/app/(main)/reports/[reportId]/ReportPage.tsx b/src/app/(main)/reports/[reportId]/ReportPage.tsx index f28942b7..0965b20d 100644 --- a/src/app/(main)/reports/[reportId]/ReportPage.tsx +++ b/src/app/(main)/reports/[reportId]/ReportPage.tsx @@ -1,11 +1,12 @@ 'use client'; -import FunnelReport from '../funnel/FunnelReport'; +import { useReport } from 'components/hooks'; import EventDataReport from '../event-data/EventDataReport'; +import FunnelReport from '../funnel/FunnelReport'; +import GoalReport from '../goals/GoalsReport'; import InsightsReport from '../insights/InsightsReport'; +import JourneyReport from '../journey/JourneyReport'; import RetentionReport from '../retention/RetentionReport'; import UTMReport from '../utm/UTMReport'; -import GoalReport from '../goals/GoalsReport'; -import { useReport } from 'components/hooks'; const reports = { funnel: FunnelReport, @@ -14,6 +15,7 @@ const reports = { retention: RetentionReport, utm: UTMReport, goals: GoalReport, + journey: JourneyReport, }; export default function ReportPage({ reportId }: { reportId: string }) { diff --git a/src/app/(main)/reports/journey/JourneyParameters.tsx b/src/app/(main)/reports/journey/JourneyParameters.tsx index b0544168..f140da13 100644 --- a/src/app/(main)/reports/journey/JourneyParameters.tsx +++ b/src/app/(main)/reports/journey/JourneyParameters.tsx @@ -1,6 +1,6 @@ import { useContext } from 'react'; import { useMessages } from 'components/hooks'; -import { Form, FormButtons, SubmitButton } from 'react-basics'; +import { Form, FormButtons, FormInput, FormRow, SubmitButton, TextField } from 'react-basics'; import { ReportContext } from '../[reportId]/Report'; import BaseParameters from '../[reportId]/BaseParameters'; @@ -9,8 +9,8 @@ export function JourneyParameters() { const { formatMessage, labels } = useMessages(); const { id, parameters } = report || {}; - const { websiteId, dateRange } = parameters || {}; - const queryDisabled = !websiteId || !dateRange; + const { websiteId, dateRange, steps } = parameters || {}; + const queryDisabled = !websiteId || !dateRange || !steps; const handleSubmit = (data: any, e: any) => { e.stopPropagation(); @@ -24,6 +24,24 @@ export function JourneyParameters() { return (
+ + + + + + + + + + + + + + + {formatMessage(labels.runQuery)} diff --git a/src/app/(main)/reports/journey/JourneyReport.tsx b/src/app/(main)/reports/journey/JourneyReport.tsx index 7b8927b4..9048b3d1 100644 --- a/src/app/(main)/reports/journey/JourneyReport.tsx +++ b/src/app/(main)/reports/journey/JourneyReport.tsx @@ -10,7 +10,7 @@ import { REPORT_TYPES } from 'lib/constants'; const defaultParameters = { type: REPORT_TYPES.journey, - parameters: {}, + parameters: { steps: 5 }, }; export default function JourneyReport({ reportId }: { reportId?: string }) { diff --git a/src/app/(main)/reports/journey/JourneyView.tsx b/src/app/(main)/reports/journey/JourneyView.tsx index ff1941dc..921d82dd 100644 --- a/src/app/(main)/reports/journey/JourneyView.tsx +++ b/src/app/(main)/reports/journey/JourneyView.tsx @@ -8,14 +8,13 @@ import { useEscapeKey } from 'components/hooks'; export default function JourneyView() { const [selected, setSelected] = useState(null); const { report } = useContext(ReportContext); - const { data } = report || {}; + const { data, parameters } = report || {}; useEscapeKey(() => setSelected(null)); - const columns = useMemo(() => { if (!data) { return []; } - return Array(data[0].items.length) + return Array(Number(parameters.steps)) .fill(undefined) .map((col = {}, index) => { data.forEach(({ items, count }) => { diff --git a/src/components/messages.ts b/src/components/messages.ts index 53e69401..a4c43af2 100644 --- a/src/components/messages.ts +++ b/src/components/messages.ts @@ -240,6 +240,8 @@ export const labels = defineMessages({ defaultMessage: 'Track your campaigns through UTM parameters.', }, steps: { id: 'label.steps', defaultMessage: 'Steps' }, + startStep: { id: 'label.start-step', defaultMessage: 'Start Step' }, + endStep: { id: 'label.end-step', defaultMessage: 'End Step' }, addStep: { id: 'label.add-step', defaultMessage: 'Add step' }, goal: { id: 'label.goal', defaultMessage: 'Goal' }, goals: { id: 'label.goals', defaultMessage: 'Goals' }, diff --git a/src/pages/api/reports/index.ts b/src/pages/api/reports/index.ts index 0698188c..63e9c1d5 100644 --- a/src/pages/api/reports/index.ts +++ b/src/pages/api/reports/index.ts @@ -27,7 +27,7 @@ const schema = { name: yup.string().max(200).required(), type: yup .string() - .matches(/funnel|insights|retention|utm|goals/i) + .matches(/funnel|insights|retention|utm|goals|journey/i) .required(), description: yup.string().max(500), parameters: yup diff --git a/src/pages/api/reports/journey.ts b/src/pages/api/reports/journey.ts index 84246f05..dd3bd57b 100644 --- a/src/pages/api/reports/journey.ts +++ b/src/pages/api/reports/journey.ts @@ -9,6 +9,9 @@ import * as yup from 'yup'; export interface RetentionRequestBody { websiteId: string; dateRange: { startDate: string; endDate: string }; + steps: number; + startStep?: string; + endStep?: string; } const schema = { @@ -21,6 +24,9 @@ const schema = { endDate: yup.date().required(), }) .required(), + steps: yup.number().min(3).max(7).required(), + startStep: yup.string(), + endStep: yup.string(), }), }; @@ -36,6 +42,9 @@ export default async ( const { websiteId, dateRange: { startDate, endDate }, + steps, + startStep, + endStep, } = req.body; if (!(await canViewWebsite(req.auth, websiteId))) { @@ -45,6 +54,9 @@ export default async ( const data = await getJourney(websiteId, { startDate: new Date(startDate), endDate: new Date(endDate), + steps, + startStep, + endStep, }); return ok(res, data); diff --git a/src/queries/analytics/reports/getJourney.ts b/src/queries/analytics/reports/getJourney.ts index a02bf9bf..e7696f8d 100644 --- a/src/queries/analytics/reports/getJourney.ts +++ b/src/queries/analytics/reports/getJourney.ts @@ -8,6 +8,8 @@ interface JourneyResult { e3: string; e4: string; e5: string; + e6: string; + e7: string; count: string; } @@ -17,6 +19,9 @@ export async function getJourney( filters: { startDate: Date; endDate: Date; + steps: number; + startStep?: string; + endStep?: string; }, ] ) { @@ -49,14 +54,14 @@ async function relationalQuery( and created_at between {{startDate}} and {{endDate}} and referrer_path != url_path), sequences as ( - SELECT s.e1, + select s.e1, s.e2, s.e3, s.e4, s.e5, count(*) count FROM ( - SELECT session_id, + select session_id, MAX(CASE WHEN event_number = 1 THEN event ELSE NULL END) AS e1, MAX(CASE WHEN event_number = 2 THEN event ELSE NULL END) AS e2, MAX(CASE WHEN event_number = 3 THEN event ELSE NULL END) AS e3, @@ -87,46 +92,99 @@ async function clickhouseQuery( filters: { startDate: Date; endDate: Date; + steps: number; + startStep?: string; + endStep?: string; }, ): Promise { - const { startDate, endDate } = filters; + const { startDate, endDate, steps, startStep, endStep } = filters; const { rawQuery } = clickhouse; + const { sequenceQuery, startStepQuery, endStepQuery, params } = getJourneyQuery( + steps, + startStep, + endStep, + ); + + function getJourneyQuery( + steps: number, + startStep?: string, + endStep?: string, + ): { + sequenceQuery: string; + startStepQuery: string; + endStepQuery: string; + params: { [key: string]: string }; + } { + const params = {}; + let sequenceQuery = ''; + let startStepQuery = ''; + let endStepQuery = ''; + + // create sequence query + let selectQuery = ''; + let maxQuery = ''; + let groupByQuery = ''; + + for (let i = 1; i <= steps; i++) { + const endQuery = i < steps ? ',' : ''; + selectQuery += `s.e${i},`; + maxQuery += `\nmax(CASE WHEN event_number = ${i} THEN event ELSE NULL END) AS e${i}${endQuery}`; + groupByQuery += `s.e${i}${endQuery} `; + } + + sequenceQuery = `\nsequences as ( + select ${selectQuery} + count(*) count + FROM ( + select visit_id, + ${maxQuery} + FROM events + group by visit_id) s + group by ${groupByQuery}) + `; + + // create start Step params query + if (startStep) { + startStepQuery = `and e1 = {startStep:String}`; + params['startStep'] = startStep; + } + + // create end Step params query + if (endStep) { + for (let i = 1; i < steps; i++) { + const startQuery = i === 1 ? 'and (' : '\nor '; + endStepQuery += `${startQuery}(e${i} = {endStep:String} and e${i + 1} is null) `; + } + endStepQuery += `\nor (e${steps} = {endStep:String}))`; + + params['endStep'] = endStep; + } + + return { + sequenceQuery, + startStepQuery, + endStepQuery, + params, + }; + } return rawQuery( ` WITH events AS ( select distinct - session_id, + visit_id, referrer_path, coalesce(nullIf(event_name, ''), url_path) event, - row_number() OVER (PARTITION BY session_id ORDER BY created_at) AS event_number - from umami.website_event + row_number() OVER (PARTITION BY visit_id ORDER BY created_at) AS event_number + from umami.website_event where website_id = {websiteId:UUID} - and created_at between {startDate:DateTime64} and {endDate:DateTime64} - and referrer_path != url_path), - sequences as ( - SELECT s.e1, - s.e2, - s.e3, - s.e4, - s.e5, - count(*) count - FROM ( - SELECT session_id, - max(CASE WHEN event_number = 1 THEN event ELSE NULL END) AS e1, - max(CASE WHEN event_number = 2 THEN event ELSE NULL END) AS e2, - max(CASE WHEN event_number = 3 THEN event ELSE NULL END) AS e3, - max(CASE WHEN event_number = 4 THEN event ELSE NULL END) AS e4, - max(CASE WHEN event_number = 5 THEN event ELSE NULL END) AS e5 - FROM events - group by session_id) s - group by s.e1, - s.e2, - s.e3, - s.e4, - s.e5) + and created_at between {startDate:DateTime64} and {endDate:DateTime64}), + ${sequenceQuery} select * from sequences + where 1 = 1 + ${startStepQuery} + ${endStepQuery} order by count desc limit 100 `, @@ -134,10 +192,28 @@ async function clickhouseQuery( websiteId, startDate, endDate, + ...params, }, ).then(parseResult); } -function parseResult(data: any) { - return data.map(({ e1, e2, e3, e4, e5, count }) => ({ items: [e1, e2, e3, e4, e5], count })); +function combineSequentialDuplicates(array: any) { + if (array.length === 0) return array; + + const result = [array[0]]; + + for (let i = 1; i < array.length; i++) { + if (array[i] !== array[i - 1]) { + result.push(array[i]); + } + } + + return result; +} + +function parseResult(data: any) { + return data.map(({ e1, e2, e3, e4, e5, e6, e7, count }) => ({ + items: combineSequentialDuplicates([e1, e2, e3, e4, e5, e6, e7]), + count, + })); }