summaryrefslogtreecommitdiffstats
path: root/chromium/net/base/escape_unittest.cc
diff options
context:
space:
mode:
authorZeno Albisser <zeno.albisser@digia.com>2013-08-15 21:46:11 +0200
committerZeno Albisser <zeno.albisser@digia.com>2013-08-15 21:46:11 +0200
commit679147eead574d186ebf3069647b4c23e8ccace6 (patch)
treefc247a0ac8ff119f7c8550879ebb6d3dd8d1ff69 /chromium/net/base/escape_unittest.cc
Initial import.
Diffstat (limited to 'chromium/net/base/escape_unittest.cc')
-rw-r--r--chromium/net/base/escape_unittest.cc430
1 files changed, 430 insertions, 0 deletions
diff --git a/chromium/net/base/escape_unittest.cc b/chromium/net/base/escape_unittest.cc
new file mode 100644
index 00000000000..bed49a5e1d5
--- /dev/null
+++ b/chromium/net/base/escape_unittest.cc
@@ -0,0 +1,430 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <string>
+
+#include "net/base/escape.h"
+
+#include "base/basictypes.h"
+#include "base/i18n/icu_string_conversions.h"
+#include "base/strings/string_util.h"
+#include "base/strings/stringprintf.h"
+#include "base/strings/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace net {
+namespace {
+
+const size_t kNpos = base::string16::npos;
+
+struct EscapeCase {
+ const char* input;
+ const char* output;
+};
+
+struct UnescapeURLCase {
+ const wchar_t* input;
+ UnescapeRule::Type rules;
+ const wchar_t* output;
+};
+
+struct UnescapeURLCaseASCII {
+ const char* input;
+ UnescapeRule::Type rules;
+ const char* output;
+};
+
+struct UnescapeAndDecodeCase {
+ const char* input;
+
+ // The expected output when run through UnescapeURL.
+ const char* url_unescaped;
+
+ // The expected output when run through UnescapeQuery.
+ const char* query_unescaped;
+
+ // The expected output when run through UnescapeAndDecodeURLComponent.
+ const wchar_t* decoded;
+};
+
+struct AdjustOffsetCase {
+ const char* input;
+ size_t input_offset;
+ size_t output_offset;
+};
+
+struct EscapeForHTMLCase {
+ const char* input;
+ const char* expected_output;
+};
+
+TEST(EscapeTest, EscapeTextForFormSubmission) {
+ const EscapeCase escape_cases[] = {
+ {"foo", "foo"},
+ {"foo bar", "foo+bar"},
+ {"foo++", "foo%2B%2B"}
+ };
+ for (size_t i = 0; i < arraysize(escape_cases); ++i) {
+ EscapeCase value = escape_cases[i];
+ EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true));
+ }
+
+ const EscapeCase escape_cases_no_plus[] = {
+ {"foo", "foo"},
+ {"foo bar", "foo%20bar"},
+ {"foo++", "foo%2B%2B"}
+ };
+ for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) {
+ EscapeCase value = escape_cases_no_plus[i];
+ EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false));
+ }
+
+ // Test all the values in we're supposed to be escaping.
+ const std::string no_escape(
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "0123456789"
+ "!'()*-._~");
+ for (int i = 0; i < 256; ++i) {
+ std::string in;
+ in.push_back(i);
+ std::string out = EscapeQueryParamValue(in, true);
+ if (0 == i) {
+ EXPECT_EQ(out, std::string("%00"));
+ } else if (32 == i) {
+ // Spaces are plus escaped like web forms.
+ EXPECT_EQ(out, std::string("+"));
+ } else if (no_escape.find(in) == std::string::npos) {
+ // Check %hex escaping
+ std::string expected = base::StringPrintf("%%%02X", i);
+ EXPECT_EQ(expected, out);
+ } else {
+ // No change for things in the no_escape list.
+ EXPECT_EQ(out, in);
+ }
+ }
+}
+
+TEST(EscapeTest, EscapePath) {
+ ASSERT_EQ(
+ // Most of the character space we care about, un-escaped
+ EscapePath(
+ "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
+ "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "[\\]^_`abcdefghijklmnopqrstuvwxyz"
+ "{|}~\x7f\x80\xff"),
+ // Escaped
+ "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
+ "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
+ "%7B%7C%7D~%7F%80%FF");
+}
+
+TEST(EscapeTest, EscapeUrlEncodedData) {
+ ASSERT_EQ(
+ // Most of the character space we care about, un-escaped
+ EscapeUrlEncodedData(
+ "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
+ "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "[\\]^_`abcdefghijklmnopqrstuvwxyz"
+ "{|}~\x7f\x80\xff", true),
+ // Escaped
+ "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
+ "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
+ "%7B%7C%7D~%7F%80%FF");
+}
+
+TEST(EscapeTest, EscapeUrlEncodedDataSpace) {
+ ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b");
+ ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b");
+}
+
+TEST(EscapeTest, UnescapeURLComponentASCII) {
+ const UnescapeURLCaseASCII unescape_cases[] = {
+ {"", UnescapeRule::NORMAL, ""},
+ {"%2", UnescapeRule::NORMAL, "%2"},
+ {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
+ {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
+ {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
+ {"Some%20random text %25%2dOK", UnescapeRule::NONE,
+ "Some%20random text %25%2dOK"},
+ {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
+ "Some%20random text %25-OK"},
+ {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
+ "Some random text %25-OK"},
+ {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
+ "Some%20random text %-OK"},
+ {"Some%20random text %25%2dOK",
+ UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
+ "Some random text %-OK"},
+ {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
+ {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
+ // Certain URL-sensitive characters should not be unescaped unless asked.
+ {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
+ "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
+ {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
+ UnescapeRule::URL_SPECIAL_CHARS,
+ "Hello%20%13%10world ## ?? == && %% ++"},
+ // We can neither escape nor unescape '@' since some websites expect it to
+ // be preserved as either '@' or "%40".
+ // See http://b/996720 and http://crbug.com/23933 .
+ {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"},
+ // Control characters.
+ {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
+ "%01%02%03%04%05%06%07%08%09 %"},
+ {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
+ {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
+ {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
+ };
+
+ for (size_t i = 0; i < arraysize(unescape_cases); i++) {
+ std::string str(unescape_cases[i].input);
+ EXPECT_EQ(std::string(unescape_cases[i].output),
+ UnescapeURLComponent(str, unescape_cases[i].rules));
+ }
+
+ // Test the NULL character unescaping (which wouldn't work above since those
+ // are just char pointers).
+ std::string input("Null");
+ input.push_back(0); // Also have a NULL in the input.
+ input.append("%00%39Test");
+
+ // When we're unescaping NULLs
+ std::string expected("Null");
+ expected.push_back(0);
+ expected.push_back(0);
+ expected.append("9Test");
+ EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
+
+ // When we're not unescaping NULLs.
+ expected = "Null";
+ expected.push_back(0);
+ expected.append("%009Test");
+ EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
+}
+
+TEST(EscapeTest, UnescapeURLComponent) {
+ const UnescapeURLCase unescape_cases[] = {
+ {L"", UnescapeRule::NORMAL, L""},
+ {L"%2", UnescapeRule::NORMAL, L"%2"},
+ {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
+ {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
+ {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
+ {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
+ L"Some%20random text %25%2dOK"},
+ {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
+ L"Some%20random text %25-OK"},
+ {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
+ L"Some random text %25-OK"},
+ {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
+ L"Some%20random text %-OK"},
+ {L"Some%20random text %25%2dOK",
+ UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
+ L"Some random text %-OK"},
+ {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
+ {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
+ // Certain URL-sensitive characters should not be unescaped unless asked.
+ {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
+ L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
+ {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
+ UnescapeRule::URL_SPECIAL_CHARS,
+ L"Hello%20%13%10world ## ?? == && %% ++"},
+ // We can neither escape nor unescape '@' since some websites expect it to
+ // be preserved as either '@' or "%40".
+ // See http://b/996720 and http://crbug.com/23933 .
+ {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"},
+ // Control characters.
+ {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
+ L"%01%02%03%04%05%06%07%08%09 %"},
+ {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
+ L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
+ {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
+ {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
+ L"Hello%20\x13\x10\x02"},
+ {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
+ L"Hello\x9824\x9827"},
+ };
+
+ for (size_t i = 0; i < arraysize(unescape_cases); i++) {
+ base::string16 str(WideToUTF16(unescape_cases[i].input));
+ EXPECT_EQ(WideToUTF16(unescape_cases[i].output),
+ UnescapeURLComponent(str, unescape_cases[i].rules));
+ }
+
+ // Test the NULL character unescaping (which wouldn't work above since those
+ // are just char pointers).
+ base::string16 input(WideToUTF16(L"Null"));
+ input.push_back(0); // Also have a NULL in the input.
+ input.append(WideToUTF16(L"%00%39Test"));
+
+ // When we're unescaping NULLs
+ base::string16 expected(WideToUTF16(L"Null"));
+ expected.push_back(0);
+ expected.push_back(0);
+ expected.append(ASCIIToUTF16("9Test"));
+ EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
+
+ // When we're not unescaping NULLs.
+ expected = WideToUTF16(L"Null");
+ expected.push_back(0);
+ expected.append(WideToUTF16(L"%009Test"));
+ EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
+}
+
+TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
+ const UnescapeAndDecodeCase unescape_cases[] = {
+ { "%",
+ "%",
+ "%",
+ L"%"},
+ { "+",
+ "+",
+ " ",
+ L"+"},
+ { "%2+",
+ "%2+",
+ "%2 ",
+ L"%2+"},
+ { "+%%%+%%%",
+ "+%%%+%%%",
+ " %%% %%%",
+ L"+%%%+%%%"},
+ { "Don't escape anything",
+ "Don't escape anything",
+ "Don't escape anything",
+ L"Don't escape anything"},
+ { "+Invalid %escape %2+",
+ "+Invalid %escape %2+",
+ " Invalid %escape %2 ",
+ L"+Invalid %escape %2+"},
+ { "Some random text %25%2dOK",
+ "Some random text %25-OK",
+ "Some random text %25-OK",
+ L"Some random text %25-OK"},
+ { "%01%02%03%04%05%06%07%08%09",
+ "%01%02%03%04%05%06%07%08%09",
+ "%01%02%03%04%05%06%07%08%09",
+ L"%01%02%03%04%05%06%07%08%09"},
+ { "%E4%BD%A0+%E5%A5%BD",
+ "\xE4\xBD\xA0+\xE5\xA5\xBD",
+ "\xE4\xBD\xA0 \xE5\xA5\xBD",
+ L"\x4f60+\x597d"},
+ { "%ED%ED", // Invalid UTF-8.
+ "\xED\xED",
+ "\xED\xED",
+ L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped.
+ };
+
+ for (size_t i = 0; i < arraysize(unescape_cases); i++) {
+ std::string unescaped = UnescapeURLComponent(unescape_cases[i].input,
+ UnescapeRule::NORMAL);
+ EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped);
+
+ unescaped = UnescapeURLComponent(unescape_cases[i].input,
+ UnescapeRule::REPLACE_PLUS_WITH_SPACE);
+ EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
+
+ // TODO: Need to test unescape_spaces and unescape_percent.
+ base::string16 decoded = UnescapeAndDecodeUTF8URLComponent(
+ unescape_cases[i].input, UnescapeRule::NORMAL, NULL);
+ EXPECT_EQ(WideToUTF16(unescape_cases[i].decoded), decoded);
+ }
+}
+
+TEST(EscapeTest, AdjustOffset) {
+ const AdjustOffsetCase adjust_cases[] = {
+ {"", 0, std::string::npos},
+ {"test", 0, 0},
+ {"test", 2, 2},
+ {"test", 4, std::string::npos},
+ {"test", std::string::npos, std::string::npos},
+ {"%2dtest", 6, 4},
+ {"%2dtest", 2, std::string::npos},
+ {"test%2d", 2, 2},
+ {"%E4%BD%A0+%E5%A5%BD", 9, 1},
+ {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos},
+ {"%ED%B0%80+%E5%A5%BD", 6, 6},
+ };
+
+ for (size_t i = 0; i < arraysize(adjust_cases); i++) {
+ size_t offset = adjust_cases[i].input_offset;
+ UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input,
+ UnescapeRule::NORMAL, &offset);
+ EXPECT_EQ(adjust_cases[i].output_offset, offset);
+ }
+}
+
+TEST(EscapeTest, EscapeForHTML) {
+ const EscapeForHTMLCase tests[] = {
+ { "hello", "hello" },
+ { "<hello>", "&lt;hello&gt;" },
+ { "don\'t mess with me", "don&#39;t mess with me" },
+ };
+ for (size_t i = 0; i < arraysize(tests); ++i) {
+ std::string result = EscapeForHTML(std::string(tests[i].input));
+ EXPECT_EQ(std::string(tests[i].expected_output), result);
+ }
+}
+
+TEST(EscapeTest, UnescapeForHTML) {
+ const EscapeForHTMLCase tests[] = {
+ { "", "" },
+ { "&lt;hello&gt;", "<hello>" },
+ { "don&#39;t mess with me", "don\'t mess with me" },
+ { "&lt;&gt;&amp;&quot;&#39;", "<>&\"'" },
+ { "& lt; &amp ; &; '", "& lt; &amp ; &; '" },
+ { "&amp;", "&" },
+ { "&quot;", "\"" },
+ { "&#39;", "'" },
+ { "&lt;", "<" },
+ { "&gt;", ">" },
+ { "&amp; &", "& &" },
+ };
+ for (size_t i = 0; i < arraysize(tests); ++i) {
+ base::string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input));
+ EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result);
+ }
+}
+
+TEST(EscapeTest, AdjustEncodingOffset) {
+ // Imagine we have strings as shown in the following cases where the
+ // %XX's represent encoded characters
+
+ // 1: abc%ECdef ==> abcXdef
+ std::vector<size_t> offsets;
+ for (size_t t = 0; t < 9; ++t)
+ offsets.push_back(t);
+ internal::AdjustEncodingOffset::Adjustments adjustments;
+ adjustments.push_back(3);
+ std::for_each(offsets.begin(), offsets.end(),
+ internal::AdjustEncodingOffset(adjustments));
+ size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
+ EXPECT_EQ(offsets.size(), arraysize(expected_1));
+ for (size_t i = 0; i < arraysize(expected_1); ++i)
+ EXPECT_EQ(expected_1[i], offsets[i]);
+
+
+ // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX
+ offsets.clear();
+ for (size_t t = 0; t < 18; ++t)
+ offsets.push_back(t);
+ adjustments.clear();
+ adjustments.push_back(0);
+ adjustments.push_back(6);
+ adjustments.push_back(9);
+ adjustments.push_back(15);
+ std::for_each(offsets.begin(), offsets.end(),
+ internal::AdjustEncodingOffset(adjustments));
+ size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos,
+ kNpos, 6, 7, 8, 9, kNpos, kNpos};
+ EXPECT_EQ(offsets.size(), arraysize(expected_2));
+ for (size_t i = 0; i < arraysize(expected_2); ++i)
+ EXPECT_EQ(expected_2[i], offsets[i]);
+}
+
+} // namespace
+} // namespace net