diff options
| author | Zeno Albisser <zeno.albisser@digia.com> | 2013-08-15 21:46:11 +0200 |
|---|---|---|
| committer | Zeno Albisser <zeno.albisser@digia.com> | 2013-08-15 21:46:11 +0200 |
| commit | 679147eead574d186ebf3069647b4c23e8ccace6 (patch) | |
| tree | fc247a0ac8ff119f7c8550879ebb6d3dd8d1ff69 /chromium/net/base/escape_unittest.cc | |
Initial import.
Diffstat (limited to 'chromium/net/base/escape_unittest.cc')
| -rw-r--r-- | chromium/net/base/escape_unittest.cc | 430 |
1 files changed, 430 insertions, 0 deletions
diff --git a/chromium/net/base/escape_unittest.cc b/chromium/net/base/escape_unittest.cc new file mode 100644 index 00000000000..bed49a5e1d5 --- /dev/null +++ b/chromium/net/base/escape_unittest.cc @@ -0,0 +1,430 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <algorithm> +#include <string> + +#include "net/base/escape.h" + +#include "base/basictypes.h" +#include "base/i18n/icu_string_conversions.h" +#include "base/strings/string_util.h" +#include "base/strings/stringprintf.h" +#include "base/strings/utf_string_conversions.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace net { +namespace { + +const size_t kNpos = base::string16::npos; + +struct EscapeCase { + const char* input; + const char* output; +}; + +struct UnescapeURLCase { + const wchar_t* input; + UnescapeRule::Type rules; + const wchar_t* output; +}; + +struct UnescapeURLCaseASCII { + const char* input; + UnescapeRule::Type rules; + const char* output; +}; + +struct UnescapeAndDecodeCase { + const char* input; + + // The expected output when run through UnescapeURL. + const char* url_unescaped; + + // The expected output when run through UnescapeQuery. + const char* query_unescaped; + + // The expected output when run through UnescapeAndDecodeURLComponent. + const wchar_t* decoded; +}; + +struct AdjustOffsetCase { + const char* input; + size_t input_offset; + size_t output_offset; +}; + +struct EscapeForHTMLCase { + const char* input; + const char* expected_output; +}; + +TEST(EscapeTest, EscapeTextForFormSubmission) { + const EscapeCase escape_cases[] = { + {"foo", "foo"}, + {"foo bar", "foo+bar"}, + {"foo++", "foo%2B%2B"} + }; + for (size_t i = 0; i < arraysize(escape_cases); ++i) { + EscapeCase value = escape_cases[i]; + EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true)); + } + + const EscapeCase escape_cases_no_plus[] = { + {"foo", "foo"}, + {"foo bar", "foo%20bar"}, + {"foo++", "foo%2B%2B"} + }; + for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) { + EscapeCase value = escape_cases_no_plus[i]; + EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false)); + } + + // Test all the values in we're supposed to be escaping. + const std::string no_escape( + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789" + "!'()*-._~"); + for (int i = 0; i < 256; ++i) { + std::string in; + in.push_back(i); + std::string out = EscapeQueryParamValue(in, true); + if (0 == i) { + EXPECT_EQ(out, std::string("%00")); + } else if (32 == i) { + // Spaces are plus escaped like web forms. + EXPECT_EQ(out, std::string("+")); + } else if (no_escape.find(in) == std::string::npos) { + // Check %hex escaping + std::string expected = base::StringPrintf("%%%02X", i); + EXPECT_EQ(expected, out); + } else { + // No change for things in the no_escape list. + EXPECT_EQ(out, in); + } + } +} + +TEST(EscapeTest, EscapePath) { + ASSERT_EQ( + // Most of the character space we care about, un-escaped + EscapePath( + "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" + "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "[\\]^_`abcdefghijklmnopqrstuvwxyz" + "{|}~\x7f\x80\xff"), + // Escaped + "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;" + "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" + "%7B%7C%7D~%7F%80%FF"); +} + +TEST(EscapeTest, EscapeUrlEncodedData) { + ASSERT_EQ( + // Most of the character space we care about, un-escaped + EscapeUrlEncodedData( + "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" + "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "[\\]^_`abcdefghijklmnopqrstuvwxyz" + "{|}~\x7f\x80\xff", true), + // Escaped + "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B" + "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" + "%7B%7C%7D~%7F%80%FF"); +} + +TEST(EscapeTest, EscapeUrlEncodedDataSpace) { + ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b"); + ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b"); +} + +TEST(EscapeTest, UnescapeURLComponentASCII) { + const UnescapeURLCaseASCII unescape_cases[] = { + {"", UnescapeRule::NORMAL, ""}, + {"%2", UnescapeRule::NORMAL, "%2"}, + {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, + {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, + {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, + {"Some%20random text %25%2dOK", UnescapeRule::NONE, + "Some%20random text %25%2dOK"}, + {"Some%20random text %25%2dOK", UnescapeRule::NORMAL, + "Some%20random text %25-OK"}, + {"Some%20random text %25%2dOK", UnescapeRule::SPACES, + "Some random text %25-OK"}, + {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, + "Some%20random text %-OK"}, + {"Some%20random text %25%2dOK", + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, + "Some random text %-OK"}, + {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"}, + {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}, + // Certain URL-sensitive characters should not be unescaped unless asked. + {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, + "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, + {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", + UnescapeRule::URL_SPECIAL_CHARS, + "Hello%20%13%10world ## ?? == && %% ++"}, + // We can neither escape nor unescape '@' since some websites expect it to + // be preserved as either '@' or "%40". + // See http://b/996720 and http://crbug.com/23933 . + {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"}, + // Control characters. + {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, + "%01%02%03%04%05%06%07%08%09 %"}, + {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, + "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, + {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"}, + {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"}, + }; + + for (size_t i = 0; i < arraysize(unescape_cases); i++) { + std::string str(unescape_cases[i].input); + EXPECT_EQ(std::string(unescape_cases[i].output), + UnescapeURLComponent(str, unescape_cases[i].rules)); + } + + // Test the NULL character unescaping (which wouldn't work above since those + // are just char pointers). + std::string input("Null"); + input.push_back(0); // Also have a NULL in the input. + input.append("%00%39Test"); + + // When we're unescaping NULLs + std::string expected("Null"); + expected.push_back(0); + expected.push_back(0); + expected.append("9Test"); + EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); + + // When we're not unescaping NULLs. + expected = "Null"; + expected.push_back(0); + expected.append("%009Test"); + EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); +} + +TEST(EscapeTest, UnescapeURLComponent) { + const UnescapeURLCase unescape_cases[] = { + {L"", UnescapeRule::NORMAL, L""}, + {L"%2", UnescapeRule::NORMAL, L"%2"}, + {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"}, + {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"}, + {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"}, + {L"Some%20random text %25%2dOK", UnescapeRule::NONE, + L"Some%20random text %25%2dOK"}, + {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL, + L"Some%20random text %25-OK"}, + {L"Some%20random text %25%2dOK", UnescapeRule::SPACES, + L"Some random text %25-OK"}, + {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, + L"Some%20random text %-OK"}, + {L"Some%20random text %25%2dOK", + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, + L"Some random text %-OK"}, + {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"}, + {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"}, + // Certain URL-sensitive characters should not be unescaped unless asked. + {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, + L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, + {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", + UnescapeRule::URL_SPECIAL_CHARS, + L"Hello%20%13%10world ## ?? == && %% ++"}, + // We can neither escape nor unescape '@' since some websites expect it to + // be preserved as either '@' or "%40". + // See http://b/996720 and http://crbug.com/23933 . + {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"}, + // Control characters. + {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, + L"%01%02%03%04%05%06%07%08%09 %"}, + {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, + L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, + {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"}, + {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, + L"Hello%20\x13\x10\x02"}, + {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS, + L"Hello\x9824\x9827"}, + }; + + for (size_t i = 0; i < arraysize(unescape_cases); i++) { + base::string16 str(WideToUTF16(unescape_cases[i].input)); + EXPECT_EQ(WideToUTF16(unescape_cases[i].output), + UnescapeURLComponent(str, unescape_cases[i].rules)); + } + + // Test the NULL character unescaping (which wouldn't work above since those + // are just char pointers). + base::string16 input(WideToUTF16(L"Null")); + input.push_back(0); // Also have a NULL in the input. + input.append(WideToUTF16(L"%00%39Test")); + + // When we're unescaping NULLs + base::string16 expected(WideToUTF16(L"Null")); + expected.push_back(0); + expected.push_back(0); + expected.append(ASCIIToUTF16("9Test")); + EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); + + // When we're not unescaping NULLs. + expected = WideToUTF16(L"Null"); + expected.push_back(0); + expected.append(WideToUTF16(L"%009Test")); + EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); +} + +TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { + const UnescapeAndDecodeCase unescape_cases[] = { + { "%", + "%", + "%", + L"%"}, + { "+", + "+", + " ", + L"+"}, + { "%2+", + "%2+", + "%2 ", + L"%2+"}, + { "+%%%+%%%", + "+%%%+%%%", + " %%% %%%", + L"+%%%+%%%"}, + { "Don't escape anything", + "Don't escape anything", + "Don't escape anything", + L"Don't escape anything"}, + { "+Invalid %escape %2+", + "+Invalid %escape %2+", + " Invalid %escape %2 ", + L"+Invalid %escape %2+"}, + { "Some random text %25%2dOK", + "Some random text %25-OK", + "Some random text %25-OK", + L"Some random text %25-OK"}, + { "%01%02%03%04%05%06%07%08%09", + "%01%02%03%04%05%06%07%08%09", + "%01%02%03%04%05%06%07%08%09", + L"%01%02%03%04%05%06%07%08%09"}, + { "%E4%BD%A0+%E5%A5%BD", + "\xE4\xBD\xA0+\xE5\xA5\xBD", + "\xE4\xBD\xA0 \xE5\xA5\xBD", + L"\x4f60+\x597d"}, + { "%ED%ED", // Invalid UTF-8. + "\xED\xED", + "\xED\xED", + L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. + }; + + for (size_t i = 0; i < arraysize(unescape_cases); i++) { + std::string unescaped = UnescapeURLComponent(unescape_cases[i].input, + UnescapeRule::NORMAL); + EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped); + + unescaped = UnescapeURLComponent(unescape_cases[i].input, + UnescapeRule::REPLACE_PLUS_WITH_SPACE); + EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); + + // TODO: Need to test unescape_spaces and unescape_percent. + base::string16 decoded = UnescapeAndDecodeUTF8URLComponent( + unescape_cases[i].input, UnescapeRule::NORMAL, NULL); + EXPECT_EQ(WideToUTF16(unescape_cases[i].decoded), decoded); + } +} + +TEST(EscapeTest, AdjustOffset) { + const AdjustOffsetCase adjust_cases[] = { + {"", 0, std::string::npos}, + {"test", 0, 0}, + {"test", 2, 2}, + {"test", 4, std::string::npos}, + {"test", std::string::npos, std::string::npos}, + {"%2dtest", 6, 4}, + {"%2dtest", 2, std::string::npos}, + {"test%2d", 2, 2}, + {"%E4%BD%A0+%E5%A5%BD", 9, 1}, + {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos}, + {"%ED%B0%80+%E5%A5%BD", 6, 6}, + }; + + for (size_t i = 0; i < arraysize(adjust_cases); i++) { + size_t offset = adjust_cases[i].input_offset; + UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input, + UnescapeRule::NORMAL, &offset); + EXPECT_EQ(adjust_cases[i].output_offset, offset); + } +} + +TEST(EscapeTest, EscapeForHTML) { + const EscapeForHTMLCase tests[] = { + { "hello", "hello" }, + { "<hello>", "<hello>" }, + { "don\'t mess with me", "don't mess with me" }, + }; + for (size_t i = 0; i < arraysize(tests); ++i) { + std::string result = EscapeForHTML(std::string(tests[i].input)); + EXPECT_EQ(std::string(tests[i].expected_output), result); + } +} + +TEST(EscapeTest, UnescapeForHTML) { + const EscapeForHTMLCase tests[] = { + { "", "" }, + { "<hello>", "<hello>" }, + { "don't mess with me", "don\'t mess with me" }, + { "<>&"'", "<>&\"'" }, + { "& lt; & ; &; '", "& lt; & ; &; '" }, + { "&", "&" }, + { """, "\"" }, + { "'", "'" }, + { "<", "<" }, + { ">", ">" }, + { "& &", "& &" }, + }; + for (size_t i = 0; i < arraysize(tests); ++i) { + base::string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input)); + EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result); + } +} + +TEST(EscapeTest, AdjustEncodingOffset) { + // Imagine we have strings as shown in the following cases where the + // %XX's represent encoded characters + + // 1: abc%ECdef ==> abcXdef + std::vector<size_t> offsets; + for (size_t t = 0; t < 9; ++t) + offsets.push_back(t); + internal::AdjustEncodingOffset::Adjustments adjustments; + adjustments.push_back(3); + std::for_each(offsets.begin(), offsets.end(), + internal::AdjustEncodingOffset(adjustments)); + size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6}; + EXPECT_EQ(offsets.size(), arraysize(expected_1)); + for (size_t i = 0; i < arraysize(expected_1); ++i) + EXPECT_EQ(expected_1[i], offsets[i]); + + + // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX + offsets.clear(); + for (size_t t = 0; t < 18; ++t) + offsets.push_back(t); + adjustments.clear(); + adjustments.push_back(0); + adjustments.push_back(6); + adjustments.push_back(9); + adjustments.push_back(15); + std::for_each(offsets.begin(), offsets.end(), + internal::AdjustEncodingOffset(adjustments)); + size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos, + kNpos, 6, 7, 8, 9, kNpos, kNpos}; + EXPECT_EQ(offsets.size(), arraysize(expected_2)); + for (size_t i = 0; i < arraysize(expected_2); ++i) + EXPECT_EQ(expected_2[i], offsets[i]); +} + +} // namespace +} // namespace net |
