Here's a function that splits given string according to predefined patterns. It's part of a trigger function used to populate one table from another. I want to be sure that I'm not making any mistakes.
CREATE TRIGGER insert_t2
AFTER INSERT ON t1
FOR EACH ROW
EXECUTE PROCEDURE insert_t2();
CREATE OR REPLACE FUNCTION insert_t2()
RETURNS TRIGGER AS $$
DECLARE
t2_name_list TEXT [];
t2_name TEXT;
BEGIN
t2_name_list := split_t1_name(NEW.name);
FOREACH t2_name IN ARRAY t2_name_list LOOP
INSERT INTO t2 (t1_id, name)
VALUES (NEW.id, t2_name);
END LOOP;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION split_t1_name(t1_name TEXT)
RETURNS TEXT [] AS $$
DECLARE
NAMES_DELIM CONSTANT TEXT := ' + ';
SUBNAMES_DELIM CONSTANT TEXT := '+';
fullname_list TEXT [];
fullname TEXT;
t2_name_list TEXT [];
BEGIN
IF t1_name IS NULL OR t1_name = '' THEN
RETURN ARRAY [NULL];
END IF;
IF position(NAMES_DELIM IN t1_name) = 0 AND
position(SUBNAMES_DELIM IN t1_name) = 0 THEN
RETURN ARRAY [t1_name];
END IF;
IF position(NAMES_DELIM IN t1_name) = 0 THEN
RETURN split_fullname(t1_name, SUBNAMES_DELIM);
END IF;
fullname_list := string_to_array(t1_name, NAMES_DELIM);
FOREACH fullname IN ARRAY fullname_list LOOP
IF position(SUBNAMES_DELIM IN fullname) <> 0 THEN
t2_name_list := t2_name_list ||
split_fullname(fullname, SUBNAMES_DELIM);
ELSE
t2_name_list := array_append(t2_name_list, fullname);
END IF;
END LOOP;
RETURN t2_name_list;
END
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION split_fullname(fullname TEXT, SUBNAMES_DELIM TEXT)
RETURNS TEXT [] AS $$
DECLARE
BSDELIM CONSTANT TEXT := ' ';
last_bsdelim_pos INTEGER;
basename TEXT;
subnames TEXT;
subname TEXT;
subname_list TEXT [];
t2_name_list TEXT [];
BEGIN
IF position(BSDELIM IN fullname) = 0 THEN
RETURN string_to_array(fullname, SUBNAMES_DELIM);
END IF;
last_bsdelim_pos := 1 + length(fullname) -
position(BSDELIM IN reverse(fullname));
basename := substring(fullname FROM 0 FOR last_bsdelim_pos);
subnames := substring(fullname FROM last_bsdelim_pos + 1);
subname_list := string_to_array(subnames, SUBNAMES_DELIM);
FOREACH subname IN ARRAY subname_list LOOP
t2_name_list := array_append(t2_name_list,
basename || BSDELIM || subname);
END LOOP;
RETURN t2_name_list;
END
$$ LANGUAGE plpgsql IMMUTABLE;
Test examples:
SELECT split_t1_name(NULL) = ARRAY [NULL];
SELECT split_t1_name('') = ARRAY [NULL];
SELECT split_t1_name('12') = ARRAY ['12'];
SELECT split_t1_name('21 A') = ARRAY ['21 A'];
SELECT split_t1_name('11+TMH') = ARRAY ['11', 'TMH'];
SELECT split_t1_name('11+TMH') = ARRAY ['11', 'TMH'];
SELECT split_t1_name('15 A + 18 B') = ARRAY ['15 A', '18 B'];
SELECT split_t1_name('VN TD a+b') = ARRAY ['VN TD a', 'VN TD b'];
SELECT split_t1_name('VN FF a+b + 14 B') = ARRAY ['VN FF a', 'VN FF b', '14 B'];
SELECT split_t1_name('VC TF a+b + VN TD a+b') = ARRAY ['VC TF a', 'VC TF b', 'VN TD a', 'VN TD b'];
- How easy is it to understand what's going on here?
- Is it better to use plperl or plpython for these kind of functions?
- Would it make code cleaner/easier to read?
I did test this with the plpython implementation and difference in performance wasn't so big:
EXPLAIN ANALYZE SELECT (split_t1_name_python(name)) FROM t1; Seq Scan on t1 (cost=0.00..7012.92 rows=25492 width=12) (actual time=0.299..649.970 rows=25492 loops=1) Total runtime: 1047.019 ms EXPLAIN ANALYZE SELECT (split_t1_name(name)) FROM t1; Seq Scan on t1 (cost=0.00..7012.92 rows=25492 width=12) (actual time=1.011..542.729 rows=25492 loops=1) Total runtime: 909.536 ms
split_t1_name(NULL)
to returnNULL
, andsplit_t1_name('')
to returnARRAY[]::TEXT[]
(an empty array ofTEXT
). – 200_success♦ Jan 22 at 4:53COMMENT ON
command or by creating multiline comment inside it's body? – nii Jan 22 at 8:11NULL
does make sense in that context. – 200_success♦ Jan 22 at 8:28