summaryrefslogtreecommitdiff
path: root/portal/tools/search/sql/functions.sql
blob: 2446c257cd4f6e8a56098842ce9af303dc04169e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
DROP TYPE IF EXISTS _archives_search_ret CASCADE;
CREATE TYPE _archives_search_ret AS (listname text, year int, month int, msgnum int, date timestamptz, subject text, author text, headline text, rank float);
CREATE FUNCTION archives_search(query text, _lists int, firstdate timestamptz, lastdate timestamptz, startofs int, hitsperpage int, sort char) RETURNS SETOF _archives_search_ret AS $$
DECLARE
   tsq tsquery;
   qry text;
   hits int;
   hit RECORD;
   det _archives_search_ret;
   curs refcursor;
   pagecount int;
   listary int[];
BEGIN
   tsq := plainto_tsquery(query);
   IF numnode(tsq) = 0 THEN
      det = (NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL);
      RETURN NEXT det;
      RETURN;
   END IF;

   hits := 0;

   IF _lists IS NULL THEN
      SELECT INTO pagecount sum(lists.pagecount) FROM lists;
      IF sort = 'd' THEN
         OPEN curs FOR SELECT list,year,month,msgnum,ts_rank_cd(fti,tsq) FROM messages WHERE fti @@ tsq AND date>COALESCE(firstdate,'1900-01-01') ORDER BY date DESC LIMIT 1000;
      ELSE
         OPEN curs FOR SELECT list,year,month,msgnum,ts_rank_cd(fti,tsq) FROM messages WHERE fti @@ tsq AND date>COALESCE(firstdate,'1900-01-01') ORDER BY ts_rank_cd(fti,tsq) DESC LIMIT 1000;
      END IF;
   ELSE
      IF _lists < 0 THEN
         SELECT INTO listary ARRAY(SELECT id FROM lists WHERE grp=-_lists);
      ELSE
         listary = ARRAY[_lists];
      END IF;
      SELECT INTO pagecount sum(lists.pagecount) FROM lists WHERE id=ANY(listary);
      IF sort = 'd' THEN
         OPEN curs FOR SELECT list,year,month,msgnum,ts_rank_cd(fti,tsq) FROM messages WHERE (list=ANY(listary)) AND fti @@ tsq AND date>COALESCE(firstdate,'1900-01-01') ORDER BY date DESC LIMIT 1000;
      ELSE
         OPEN curs FOR SELECT list,year,month,msgnum,ts_rank_cd(fti,tsq) FROM messages WHERE (list=ANY(listary)) AND fti @@ tsq AND date>COALESCE(firstdate,'1900-01-01') ORDER BY ts_rank_cd(fti,tsq) DESC LIMIT 1000;
      END IF;
   END IF;
   LOOP
      FETCH curs INTO hit;
      IF NOT FOUND THEN
         EXIT;
      END IF;
      hits := hits+1;
      IF (hits < startofs+1) OR (hits > startofs + hitsperpage) THEN
         CONTINUE;
      END IF;
      SELECT INTO det lists.name, hit.year, hit.month, hit.msgnum, messages.date, messages.subject, messages.author, ts_headline(messages.txt,tsq,'StartSel="[[[[[[",StopSel="]]]]]]"'), hit.ts_rank_cd FROM messages INNER JOIN lists ON messages.list=lists.id WHERE messages.list=hit.list AND messages.year=hit.year AND messages.month=hit.month AND messages.msgnum=hit.msgnum;

      RETURN NEXT det;
   END LOOP;
   det=(NULL,NULL,NULL,NULL,NULL,NULL,NULL);
   det.year=hits;
   det.month=pagecount;
   RETURN NEXT det;
   IF startofs=0 THEN
      INSERT INTO search_stats VALUES (CURRENT_TIMESTAMP,'A',hits,tsq);
   END IF;
END;
$$
LANGUAGE 'plpgsql';


DROP TYPE IF EXISTS _site_search_ret CASCADE;
CREATE TYPE _site_search_ret AS (siteid int, baseurl text, suburl text, title text, headline text, rank float);
CREATE FUNCTION site_search(query text, startofs int, hitsperpage int, allsites bool, _suburl text)
RETURNS SETOF _site_search_ret AS $$
DECLARE
    tsq tsquery;
    qry text;
    hits int;
    hit RECORD;
    det _site_search_ret;
    curs refcursor;
    pagecount int;
BEGIN
    tsq := plainto_tsquery(query);
    IF numnode(tsq) = 0 THEN
        det = (0, NULL, NULL, NULL, NULL, 0);
        RETURN NEXT det;
        RETURN;
    END IF;

    hits := 0;

    IF allsites THEN
        SELECT INTO pagecount sum(sites.pagecount) FROM sites;
        OPEN curs FOR SELECT sites.id AS siteid, sites.baseurl, webpages.suburl, ts_rank_cd(fti,tsq) FROM webpages INNER JOIN sites ON webpages.site=sites.id WHERE fti @@ tsq ORDER BY ts_rank_cd(fti,tsq) DESC LIMIT 1000;
    ELSE
        SELECT INTO pagecount sites.pagecount FROM sites WHERE id=1;
        IF _suburl IS NULL THEN
            OPEN curs FOR SELECT sites.id AS siteid, sites.baseurl, webpages.suburl, ts_rank_cd(fti,tsq) FROM webpages INNER JOIN sites ON webpages.site=sites.id WHERE fti @@ tsq AND site=1 ORDER BY ts_rank_cd(fti,tsq) DESC LIMIT 1000;
        ELSE
            OPEN curs FOR SELECT sites.id AS siteid, sites.baseurl, webpages.suburl, ts_rank_cd(fti,tsq) FROM webpages INNER JOIN sites ON webpages.site=sites.id WHERE fti @@ tsq AND site=1 AND suburl LIKE _suburl||'%' ORDER BY ts_rank_cd(fti,tsq) DESC LIMIT 1000;
        END IF;
    END IF;
    LOOP
       FETCH curs INTO hit;
       IF NOT FOUND THEN
          EXIT;
       END IF;
       hits := hits+1;
       IF (hits < startofs+1) OR (hits > startofs+hitsperpage) THEN
          CONTINUE;
       END IF;
       SELECT INTO det hit.siteid, hit.baseurl, hit.suburl, title, ts_headline(webpages.txt,tsq,'StartSel="[[[[[[",StopSel="]]]]]]"'), hit.ts_rank_cd FROM webpages WHERE webpages.site=hit.siteid AND webpages.suburl=hit.suburl;
       RETURN NEXT det;
    END LOOP;
    det=(NULL,NULL,NULL,NULL,NULL);
    det.rank = hits;
    det.siteid = pagecount;
    RETURN NEXT det;
    IF startofs=0 THEN
       INSERT INTO search_stats VALUES (CURRENT_TIMESTAMP,'W',hits,tsq);
    END IF;
END;
$$
LANGUAGE 'plpgsql';