Take the 2-minute tour ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

Here's a function that splits given string according to predefined patterns. It's part of a trigger function used to populate one table from another. I want to be sure that I'm not making any mistakes.

CREATE TRIGGER insert_t2
AFTER INSERT ON t1
FOR EACH ROW
EXECUTE PROCEDURE insert_t2();

CREATE OR REPLACE FUNCTION insert_t2()
    RETURNS TRIGGER AS $$
    DECLARE
        t2_name_list TEXT [];
        t2_name      TEXT;
    BEGIN
        t2_name_list := split_t1_name(NEW.name);
        FOREACH t2_name IN ARRAY t2_name_list LOOP
            INSERT INTO t2 (t1_id, name)
            VALUES (NEW.id, t2_name);
        END LOOP;
        RETURN NULL;
    END
    $$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION split_t1_name(t1_name TEXT)
    RETURNS TEXT [] AS $$
DECLARE
    NAMES_DELIM CONSTANT    TEXT := ' + ';
    SUBNAMES_DELIM CONSTANT TEXT := '+';
    fullname_list           TEXT [];
    fullname                TEXT;
    t2_name_list            TEXT [];
BEGIN
    IF t1_name IS NULL OR t1_name = '' THEN
        RETURN ARRAY [NULL];
    END IF;

    IF position(NAMES_DELIM IN t1_name) = 0 AND
       position(SUBNAMES_DELIM IN t1_name) = 0 THEN
        RETURN ARRAY [t1_name];
    END IF;

    IF position(NAMES_DELIM IN t1_name) = 0 THEN
        RETURN split_fullname(t1_name, SUBNAMES_DELIM);
    END IF;

    fullname_list := string_to_array(t1_name, NAMES_DELIM);
    FOREACH fullname IN ARRAY fullname_list LOOP
        IF position(SUBNAMES_DELIM IN fullname) <> 0 THEN
            t2_name_list := t2_name_list ||
                            split_fullname(fullname, SUBNAMES_DELIM);
        ELSE
            t2_name_list := array_append(t2_name_list, fullname);
        END IF;
    END LOOP;
    RETURN t2_name_list;
END
$$ LANGUAGE plpgsql IMMUTABLE;

CREATE OR REPLACE FUNCTION split_fullname(fullname TEXT, SUBNAMES_DELIM TEXT)
    RETURNS TEXT [] AS $$
    DECLARE
        BSDELIM CONSTANT TEXT := ' ';
        last_bsdelim_pos INTEGER;
        basename         TEXT;
        subnames         TEXT;
        subname          TEXT;
        subname_list     TEXT [];
        t2_name_list     TEXT [];
    BEGIN
        IF position(BSDELIM IN fullname) = 0 THEN
            RETURN string_to_array(fullname, SUBNAMES_DELIM);
        END IF;
        last_bsdelim_pos := 1 + length(fullname) -
                            position(BSDELIM IN reverse(fullname));
        basename := substring(fullname FROM 0 FOR last_bsdelim_pos);
        subnames := substring(fullname FROM last_bsdelim_pos + 1);
        subname_list := string_to_array(subnames, SUBNAMES_DELIM);
        FOREACH subname IN ARRAY subname_list LOOP
            t2_name_list := array_append(t2_name_list,
                                         basename || BSDELIM || subname);
        END LOOP;
        RETURN t2_name_list;
    END
    $$ LANGUAGE plpgsql IMMUTABLE;

Test examples:

SELECT split_t1_name(NULL)                    = ARRAY [NULL];
SELECT split_t1_name('')                      = ARRAY [NULL];
SELECT split_t1_name('12')                    = ARRAY ['12'];
SELECT split_t1_name('21 A')                  = ARRAY ['21 A'];
SELECT split_t1_name('11+TMH')                = ARRAY ['11', 'TMH'];
SELECT split_t1_name('11+TMH')                = ARRAY ['11', 'TMH'];
SELECT split_t1_name('15 A + 18 B')           = ARRAY ['15 A', '18 B'];
SELECT split_t1_name('VN TD a+b')             = ARRAY ['VN TD a', 'VN TD b'];
SELECT split_t1_name('VN FF a+b + 14 B')      = ARRAY ['VN FF a', 'VN FF b', '14 B'];
SELECT split_t1_name('VC TF a+b + VN TD a+b') = ARRAY ['VC TF a', 'VC TF b', 'VN TD a', 'VN TD b'];
  1. How easy is it to understand what's going on here?
  2. Is it better to use plperl or plpython for these kind of functions?
  3. Would it make code cleaner/easier to read?

I did test this with the plpython implementation and difference in performance wasn't so big:

EXPLAIN ANALYZE SELECT (split_t1_name_python(name)) FROM t1;  
Seq Scan on t1  (cost=0.00..7012.92 rows=25492 width=12) (actual time=0.299..649.970 rows=25492 loops=1)
Total runtime: 1047.019 ms

EXPLAIN ANALYZE SELECT (split_t1_name(name)) FROM t1; 
Seq Scan on t1  (cost=0.00..7012.92 rows=25492 width=12) (actual time=1.011..542.729 rows=25492 loops=1)
Total runtime: 909.536 ms
share|improve this question
    
Comments, comments, comments. What's it for. What's the purpose. Why does this do what it does. Thats's the main thing missing IMO. –  Craig Ringer Jan 21 at 22:43
    
Personally, I would expect split_t1_name(NULL) to return NULL, and split_t1_name('') to return ARRAY[]::TEXT[] (an empty array of TEXT). –  200_success Jan 22 at 4:53
    
@Craig, is it better to comment function by using COMMENT ON command or by creating multiline comment inside it's body? –  nii Jan 22 at 8:11
    
@200_success, take a look at the added code of trigger and trigger function. Sorry, I must've included it from the beginning. –  nii Jan 22 at 8:24
    
I see. Returning an array containing a NULL does make sense in that context. –  200_success Jan 22 at 8:28

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Browse other questions tagged or ask your own question.