Home Page for the TeradataForum
 

Archives of the TeradataForum

Message Posted: Mon, 08 Sep 2003 @ 18:06:22 GMT


     
  <Prev Next>   <<First <Prev
Next>
Last>>
 


Subj:   Re: MEAN , MEDIAN and 90th percentile
 
From:   Dieter Noeth

VEERENDER ROKKA wrote:

  Is there any function in teradata for calculating MEAN , MEDIAN and 90th percentile for a column in the table.  



As Claybourne already explained, PERCENT_RANK can be used for percentile (it's better than QUANTILE, because PERCENT_RANK is SQL:1999 and there's only a minor difference how it's calculated).

For median i'll append some cut'n'paste...


Dieter


CREATE MULTISET TABLE median_test
(
   id INTEGER
   ,med_group INTEGER
   ,med_value INTEGER
) PRIMARY INDEX (id)
;

INSERT INTO median_test
SELECT
   calendar_date
   ,RANDOM(1,2000)
   ,RANDOM(1,10000)
FROM sys_calendar.calendar
;

COLLECT STATISTICS ON median_test COLUMN med_group;
COLLECT STATISTICS ON median_test COLUMN med_value;

/*** "financial median" using COUNT and ROW_NUMBER ***/
SELECT
   dt1.med_group
   ,avg(med_value)
FROM
  (SELECT
     med_group
     ,med_value
     ,ROW_NUMBER() OVER (PARTITION BY med_group
                   ORDER BY med_value) AS row_num
/* < V2R5
     ,SUM(1) OVER (PARTITION BY med_group
                   ORDER BY med_value
                   ROWS UNBOUNDED PRECEDING) AS row_num
*/
   FROM median_test
  ) AS dt1
     JOIN
  (SELECT
     med_group
     ,COUNT(*) AS row_count
   FROM median_test
   GROUP BY med_group
  ) AS dt2
     ON dt1.med_group = dt2.med_group
WHERE
   row_num = (row_count + 1) / 2
OR
   row_num = (row_count / 2) + 1
GROUP BY dt1.med_group
ORDER BY dt1.med_group
;

/*** Usually more efficient:
      "financial median" using Group COUNT and ROW_NUMBER ***/
SELECT
   dt1.med_group
   ,AVG(med_value)
FROM
  (SELECT
     med_group
     ,med_value
     ,ROW_NUMBER() OVER (PARTITION BY med_group
                   ORDER BY med_value) AS row_num
/* < V2R5
     ,SUM(1) OVER (PARTITION BY med_group
                   ORDER BY med_value
                   ROWS UNBOUNDED PRECEDING) AS row_num
*/
     ,COUNT(*) OVER (PARTITION BY med_group
                     ROWS BETWEEN UNBOUNDED PRECEDING
                     AND UNBOUNDED FOLLOWING) AS row_count
   FROM median_test
) AS dt1
WHERE
   row_num = (row_count + 1) / 2
OR
   row_num = (row_count / 2) + 1
GROUP BY med_group
ORDER BY med_group
;

/*** "statistical median" using Group COUNT + ROW_NUMBER ***/
SELECT
   dt1.med_group
   ,med_value
FROM
  (SELECT
     med_group
     ,med_value
     ,ROW_NUMBER() OVER (PARTITION BY med_group
                   ORDER BY med_value) AS row_num
/* < V2R5
     ,SUM(1) OVER (PARTITION BY med_group
                   ORDER BY med_value
                   ROWS UNBOUNDED PRECEDING) AS row_num
*/
     ,COUNT(*) OVER (PARTITION BY med_group
                     ROWS BETWEEN UNBOUNDED PRECEDING
                     AND UNBOUNDED FOLLOWING) AS row_count
   FROM median_test
) AS dt1
WHERE
   row_num = (row_count + 1) / 2 --left (lesser) value
--  row_num = (row_count / 2) + 1 --right (greater) value
ORDER BY med_group
;

/*** don't use old sytle queries, it's so sloooooow.
      This one is for "statistical median" ***/
SELECT
   t1.med_group
   ,t1.med_value
FROM median_test t1
   JOIN median_test t2
     ON t1.med_group = t2.med_group
GROUP BY t1.med_group, t1.med_value
HAVING
   SUM(CASE WHEN t2.med_value <= t1.med_value THEN 1 ELSE 0 END)
     >= (COUNT(*) + 1) / 2
   AND
   SUM(CASE WHEN t2.med_value >= t1.med_value THEN 1 ELSE 0 END)
     >= (COUNT(*) / 2 ) + 1
;


     
  <Prev Next>   <<First <Prev
Next>
Last>>
 
 
 
 
 
 
 
 
 
  
  Top Home Privacy Feedback  
 
 
Copyright for the TeradataForum (TDATA-L), Manta BlueSky    
Copyright 2016 - All Rights Reserved    
Last Modified: 15 Jun 2023