|
|
Archives of the TeradataForum
Message Posted: Thu, 23 Jun 2005 @ 18:27:15 GMT
Subj: | | Re: String parsing |
|
From: | | Dieter Noeth |
Boris Mogilevsky wrote:
| I have a pipe delimeted String prefixString = "BR|BRCL|BRSD" | |
| I need string utility that can parse this into BR | |
> BRCL
> BRSD
| and can loop through these in an Insert Statement.? | |
In V2R6 this would be a simple task using a Table Function, maybe someone already wrote it?
Pre-V2R6 it's plain SQL, looks quite complicated, but is fast even on large datasets...
Dieter
DROP TABLE Strings;
CREATE TABLE Strings
(
groupcol INT NOT NULL,
string VARCHAR(247) NOT NULL
);
/*** Some comma delimited strings ***/
INSERT INTO Strings VALUES (1,'a,b,c,d,e');
INSERT INTO Strings VALUES (2,'a,bb,ccc,dddd,eeeee');
/*** Always be prepared for exceptional values... ***/
INSERT INTO Strings VALUES (3,'a,,bb');
INSERT INTO Strings VALUES (4,',a');
INSERT INTO Strings VALUES (5,'a,');
INSERT INTO Strings VALUES (6,',');
INSERT INTO Strings VALUES (7,'');
collect statistics on strings column groupcol;
/***
A helper table "Nums" with integer values from 1 (0) to the maximum
stringlength is needed.
1. Cross join Strings to Nums
2. Find the ','
1 a
2 , <--
3 b
4 b
5 , <--
6 c
7 c
8 c
9 , <--
10 d
11 d
12 d
13 d
14 , <--
15 e
16 e
17 e
17 e
18 e
3. Calculate the value
***/
/*** Joe Celko's query using 2 Cross-Joins is quite inefficient on
Teradata ****/
SELECT
groupcol
,SUBSTRING (',' || I1.string || ','
FROM MAX(S1.n + 1)
FOR (S2.n - MAX(S1.n + 1))
) AS word
,COUNT(S2.n) AS pos
FROM Strings AS I1, nums AS S1, nums AS S2
WHERE
SUBSTRING (',' || I1.string || ',' FROM S1.n FOR 1) = ','
AND SUBSTRING (',' || I1.string || ',' FROM S2.n FOR 1) = ','
AND s1.n between 1 and 247 and s2.n between 1 and 247
AND S1.n < S2.n
AND S2.n <= CHAR_LENGTH(I1.string) + 1
GROUP BY I1.groupcol, I1.string, S2.n
;
/*** Adding trailing ',' to the string + POSITION ***/
SELECT
groupcol
,SUBSTRING(string
FROM n
FOR (POSITION(',' IN SUBSTRING(string || ',' FROM n))) - 1
) AS word
,n
FROM Strings s, (SELECT n FROM nums WHERE n BETWEEN 1 AND 247 + 1) tmp
WHERE
(
n BETWEEN 2 AND CHAR_LENGTH(string) + 1
AND
SUBSTRING(string FROM n - 1 FOR 1) = ','
)
OR
n = 1
;
/*** Without modifying the string: CASE POSITION ***/
SELECT
groupcol
,SUBSTRING(string
FROM n
FOR (CASE
WHEN (POSITION(',' IN SUBSTRING(string FROM n))) = 0
THEN 64000
ELSE (POSITION(',' IN SUBSTRING(string FROM n)))
END) - 1
) AS word
,n
FROM Strings s, (SELECT n FROM nums WHERE n BETWEEN 1 AND 247 + 1) tmp
WHERE
(
n BETWEEN 2 AND CHAR_LENGTH(string) + 1
AND
SUBSTRING(string FROM n - 1 FOR 1) = ','
)
OR
n = 1
;
/*** Without modifying the string: OLAP ***/
SELECT
groupcol
,SUBSTRING(string FROM n
FOR (COALESCE(MAX(n) OVER (PARTITION BY groupcol
ORDER BY n ROWS BETWEEN 1 FOLLOWING and 1 FOLLOWING)
, 64000)) - n - 1
) AS word
,n
FROM Strings s, (SELECT n FROM nums WHERE n BETWEEN 1 AND 247 + 1) tmp
WHERE
(
n BETWEEN 2 AND CHAR_LENGTH(string) + 1
AND
SUBSTRING(string FROM n - 1 FOR 1) = ','
)
OR
n = 1
;
/*** Advantage OLAP version:
,ROW_NUMBER() OVER (PARTITION BY groupcol ORDER BY n) AS AS WordNumber
instead of the position of the start character "n".
Where clause may be enhanced for several delimiter chars...
a string of delimiter chars:
...
WHERE
(
n BETWEEN 2 AND CHAR_LENGTH(string) + 1
AND POSITION(SUBSTRING(string FROM n - 1 FOR 1) IN '!,;*') > 0
)
OR
n = 1
a list of delimiter chars:
...
WHERE
(
n BETWEEN 2 AND CHAR_LENGTH(string) + 1
AND SUBSTRING(string FROM n - 1 FOR 1) IN ('!',',',';','*'))
OR
n = 1
a table with delimiter chars:
-- most expensive version: another cross join to splitchars
create table splitchars(c char not null primary key);
ins splitchars('!');
ins splitchars(',');
ins splitchars(';');
ins splitchars('*');
...
FROM Strings s, (SELECT n FROM nums WHERE n BETWEEN 1 AND xxx + 1) tmp,
(select c, row_number() over (order by c) AS Char# FROM splitchars) x
WHERE
(
n BETWEEN 2 AND CHAR_LENGTH(string) + 1
AND SUBSTRING(string FROM n - 1 FOR 1) = c)
OR
(n = 1 AND Char# = 1)
***/
| |