summaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/varlena.c
diff options
context:
space:
mode:
authorPeter Eisentraut2022-09-16 12:51:47 +0000
committerPeter Eisentraut2022-09-16 12:53:12 +0000
commit5ac51c8c9e4434140f4ba45b7bdb38896b48cc64 (patch)
tree1b55802b0fad234d98c2e6ea0488883cd467875f /src/backend/utils/adt/varlena.c
parent1e08576691bf1a25c0e28745e5e800c44f2a1c76 (diff)
Adjust assorted hint messages that list all valid options.
Instead of listing all valid options, we now try to provide one that looks similar. Since this may be useful elsewhere, this change introduces a new set of functions that can be reused for similar purposes. Author: Nathan Bossart <nathandbossart@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/b1f9f399-3a1a-b554-283f-4ae7f34608e2@enterprisedb.com
Diffstat (limited to 'src/backend/utils/adt/varlena.c')
-rw-r--r--src/backend/utils/adt/varlena.c82
1 files changed, 82 insertions, 0 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 816c66b7e77..1f6e0908216 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -6198,6 +6198,88 @@ rest_of_char_same(const char *s1, const char *s2, int len)
/*
+ * The following *ClosestMatch() functions can be used to determine whether a
+ * user-provided string resembles any known valid values, which is useful for
+ * providing hints in log messages, among other things. Use these functions
+ * like so:
+ *
+ * initClosestMatch(&state, source_string, max_distance);
+ *
+ * for (int i = 0; i < num_valid_strings; i++)
+ * updateClosestMatch(&state, valid_strings[i]);
+ *
+ * closestMatch = getClosestMatch(&state);
+ */
+
+/*
+ * Initialize the given state with the source string and maximum Levenshtein
+ * distance to consider.
+ */
+void
+initClosestMatch(ClosestMatchState *state, const char *source, int max_d)
+{
+ Assert(state);
+ Assert(max_d >= 0);
+
+ state->source = source;
+ state->min_d = -1;
+ state->max_d = max_d;
+ state->match = NULL;
+}
+
+/*
+ * If the candidate string is a closer match than the current one saved (or
+ * there is no match saved), save it as the closest match.
+ *
+ * If the source or candidate string is NULL, empty, or too long, this function
+ * takes no action. Likewise, if the Levenshtein distance exceeds the maximum
+ * allowed or more than half the characters are different, no action is taken.
+ */
+void
+updateClosestMatch(ClosestMatchState *state, const char *candidate)
+{
+ int dist;
+
+ Assert(state);
+
+ if (state->source == NULL || state->source[0] == '\0' ||
+ candidate == NULL || candidate[0] == '\0')
+ return;
+
+ /*
+ * To avoid ERROR-ing, we check the lengths here instead of setting
+ * 'trusted' to false in the call to varstr_levenshtein_less_equal().
+ */
+ if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||
+ strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)
+ return;
+
+ dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),
+ candidate, strlen(candidate), 1, 1, 1,
+ state->max_d, true);
+ if (dist <= state->max_d &&
+ dist <= strlen(state->source) / 2 &&
+ (state->min_d == -1 || dist < state->min_d))
+ {
+ state->min_d = dist;
+ state->match = candidate;
+ }
+}
+
+/*
+ * Return the closest match. If no suitable candidates were provided via
+ * updateClosestMatch(), return NULL.
+ */
+const char *
+getClosestMatch(ClosestMatchState *state)
+{
+ Assert(state);
+
+ return state->match;
+}
+
+
+/*
* Unicode support
*/