|
|
Archives of the TeradataForum
Message Posted: Mon, 08 Sep 2003 @ 18:06:22 GMT
Subj: | | Re: MEAN , MEDIAN and 90th percentile |
|
From: | | Dieter Noeth |
VEERENDER ROKKA wrote:
| Is there any function in teradata for calculating MEAN , MEDIAN and 90th percentile for a column in the table. | |
As Claybourne already explained, PERCENT_RANK can be used for percentile (it's better than QUANTILE, because PERCENT_RANK is
SQL:1999 and there's only a minor difference how it's calculated).
For median i'll append some cut'n'paste...
Dieter
CREATE MULTISET TABLE median_test
(
id INTEGER
,med_group INTEGER
,med_value INTEGER
) PRIMARY INDEX (id)
;
INSERT INTO median_test
SELECT
calendar_date
,RANDOM(1,2000)
,RANDOM(1,10000)
FROM sys_calendar.calendar
;
COLLECT STATISTICS ON median_test COLUMN med_group;
COLLECT STATISTICS ON median_test COLUMN med_value;
/*** "financial median" using COUNT and ROW_NUMBER ***/
SELECT
dt1.med_group
,avg(med_value)
FROM
(SELECT
med_group
,med_value
,ROW_NUMBER() OVER (PARTITION BY med_group
ORDER BY med_value) AS row_num
/* < V2R5
,SUM(1) OVER (PARTITION BY med_group
ORDER BY med_value
ROWS UNBOUNDED PRECEDING) AS row_num
*/
FROM median_test
) AS dt1
JOIN
(SELECT
med_group
,COUNT(*) AS row_count
FROM median_test
GROUP BY med_group
) AS dt2
ON dt1.med_group = dt2.med_group
WHERE
row_num = (row_count + 1) / 2
OR
row_num = (row_count / 2) + 1
GROUP BY dt1.med_group
ORDER BY dt1.med_group
;
/*** Usually more efficient:
"financial median" using Group COUNT and ROW_NUMBER ***/
SELECT
dt1.med_group
,AVG(med_value)
FROM
(SELECT
med_group
,med_value
,ROW_NUMBER() OVER (PARTITION BY med_group
ORDER BY med_value) AS row_num
/* < V2R5
,SUM(1) OVER (PARTITION BY med_group
ORDER BY med_value
ROWS UNBOUNDED PRECEDING) AS row_num
*/
,COUNT(*) OVER (PARTITION BY med_group
ROWS BETWEEN UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING) AS row_count
FROM median_test
) AS dt1
WHERE
row_num = (row_count + 1) / 2
OR
row_num = (row_count / 2) + 1
GROUP BY med_group
ORDER BY med_group
;
/*** "statistical median" using Group COUNT + ROW_NUMBER ***/
SELECT
dt1.med_group
,med_value
FROM
(SELECT
med_group
,med_value
,ROW_NUMBER() OVER (PARTITION BY med_group
ORDER BY med_value) AS row_num
/* < V2R5
,SUM(1) OVER (PARTITION BY med_group
ORDER BY med_value
ROWS UNBOUNDED PRECEDING) AS row_num
*/
,COUNT(*) OVER (PARTITION BY med_group
ROWS BETWEEN UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING) AS row_count
FROM median_test
) AS dt1
WHERE
row_num = (row_count + 1) / 2 --left (lesser) value
-- row_num = (row_count / 2) + 1 --right (greater) value
ORDER BY med_group
;
/*** don't use old sytle queries, it's so sloooooow.
This one is for "statistical median" ***/
SELECT
t1.med_group
,t1.med_value
FROM median_test t1
JOIN median_test t2
ON t1.med_group = t2.med_group
GROUP BY t1.med_group, t1.med_value
HAVING
SUM(CASE WHEN t2.med_value <= t1.med_value THEN 1 ELSE 0 END)
>= (COUNT(*) + 1) / 2
AND
SUM(CASE WHEN t2.med_value >= t1.med_value THEN 1 ELSE 0 END)
>= (COUNT(*) / 2 ) + 1
;
| |