-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathagregated_query.sql
More file actions
130 lines (121 loc) · 4.89 KB
/
agregated_query.sql
File metadata and controls
130 lines (121 loc) · 4.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
CREATE OR REPLACE FUNCTION get_sensor_aggregated_measurements(
p_sensor_id INTEGER,
p_interval TEXT DEFAULT 'hour',
p_interval_value INTEGER DEFAULT 1,
p_start_date TIMESTAMP DEFAULT NULL,
p_end_date TIMESTAMP DEFAULT NULL,
p_min_value FLOAT DEFAULT NULL,
p_max_value FLOAT DEFAULT NULL
)
RETURNS TABLE (
measurement_time TIMESTAMP,
value FLOAT,
median_value FLOAT,
point_count BIGINT, -- Changed from INTEGER to BIGINT
lower_bound FLOAT,
upper_bound FLOAT,
parametric_lower_bound FLOAT,
parametric_upper_bound FLOAT,
std_dev FLOAT,
min_value FLOAT,
max_value FLOAT,
percentile_25 FLOAT,
percentile_75 FLOAT,
ci_method TEXT,
confidence_level NUMERIC
) AS $$
DECLARE
interval_sql TEXT;
BEGIN
-- Handle custom interval sizes (e.g., 15 minutes)
IF p_interval = 'minute' AND p_interval_value > 1 THEN
interval_sql := format('date_trunc(''hour'', collectiontime) +
INTERVAL ''%s min'' * (EXTRACT(MINUTE FROM collectiontime)::INTEGER / %s)',
p_interval_value, p_interval_value);
ELSIF p_interval = 'hour' AND p_interval_value > 1 THEN
interval_sql := format('date_trunc(''day'', collectiontime) +
INTERVAL ''%s hour'' * (EXTRACT(HOUR FROM collectiontime)::INTEGER / %s)',
p_interval_value, p_interval_value);
ELSE
interval_sql := format('date_trunc(%L, collectiontime)', p_interval);
END IF;
-- Using the alternative approach without arrays to avoid potential issues
RETURN QUERY EXECUTE format(
'WITH aggregated_stats AS (
SELECT
%s AS interval_start,
AVG(measurementvalue) AS avg_value,
COUNT(*) AS point_count,
STDDEV(measurementvalue) AS std_dev,
MIN(measurementvalue) AS min_value,
MAX(measurementvalue) AS max_value
FROM
measurements
WHERE
sensorid = $1
AND ($2::TIMESTAMP IS NULL OR collectiontime >= $2)
AND ($3::TIMESTAMP IS NULL OR collectiontime <= $3)
AND ($4::FLOAT IS NULL OR measurementvalue >= $4)
AND ($5::FLOAT IS NULL OR measurementvalue <= $5)
GROUP BY
interval_start
ORDER BY
interval_start
),
percentile_stats AS (
SELECT
%s AS interval_start,
percentile_cont(0.025) WITHIN GROUP (ORDER BY measurementvalue) AS percentile_2_5,
percentile_cont(0.25) WITHIN GROUP (ORDER BY measurementvalue) AS percentile_25,
percentile_cont(0.5) WITHIN GROUP (ORDER BY measurementvalue) AS median_value,
percentile_cont(0.75) WITHIN GROUP (ORDER BY measurementvalue) AS percentile_75,
percentile_cont(0.975) WITHIN GROUP (ORDER BY measurementvalue) AS percentile_97_5
FROM
measurements
WHERE
sensorid = $1
AND ($2::TIMESTAMP IS NULL OR collectiontime >= $2)
AND ($3::TIMESTAMP IS NULL OR collectiontime <= $3)
AND ($4::FLOAT IS NULL OR measurementvalue >= $4)
AND ($5::FLOAT IS NULL OR measurementvalue <= $5)
GROUP BY
interval_start
)
SELECT
a.interval_start AS measurement_time,
a.avg_value AS value,
p.median_value,
a.point_count,
p.percentile_2_5 AS lower_bound,
p.percentile_97_5 AS upper_bound,
a.avg_value - (CASE
WHEN a.point_count >= 30 THEN 1.96
WHEN a.point_count >= 20 THEN 2.09
WHEN a.point_count >= 10 THEN 2.23
ELSE 2.58
END) * (a.std_dev / SQRT(GREATEST(a.point_count, 1))) AS parametric_lower_bound,
a.avg_value + (CASE
WHEN a.point_count >= 30 THEN 1.96
WHEN a.point_count >= 20 THEN 2.09
WHEN a.point_count >= 10 THEN 2.23
ELSE 2.58
END) * (a.std_dev / SQRT(GREATEST(a.point_count, 1))) AS parametric_upper_bound,
a.std_dev,
a.min_value,
a.max_value,
p.percentile_25,
p.percentile_75,
''percentile'' AS ci_method,
0.95 AS confidence_level
FROM
aggregated_stats a
JOIN
percentile_stats p ON a.interval_start = p.interval_start
WHERE
a.point_count > 1
ORDER BY
measurement_time',
interval_sql, interval_sql
) USING p_sensor_id, p_start_date, p_end_date, p_min_value, p_max_value;
END;
$$ LANGUAGE plpgsql;