/****************************************************************************
 * Twitch SDK
 *
 * This software is supplied under the terms of a license agreement with
 * Twitch Interactive, Inc. and may not be copied or used except in accordance
 * with the terms of that agreement
 *
 * Copyright (c) 2012-2016 Twitch Interactive, Inc.
 ***************************************************************************/

#include "fixtures/sdkbasetest.h"
#include "twitchsdk/core/utf8.h"

#include "gtest/gtest.h"

using namespace ttv::test;

TEST_F(SdkBaseTest, ValidateUtf8) {
  std::vector<std::string> valid_notfour;
  std::vector<std::string> valid_four;
  std::vector<std::string> invalid;

  valid_notfour.push_back("");  // Empty string
  valid_notfour.push_back(
    "TEST_F(SdkBaseTest, ValidateUtf8)\n{\n\tstd::vector<std::string> valid_notfour;\n\tstd::vector<std::string> valid_four;\n\tstd::vector<std::string> invalid;");
  valid_notfour.push_back("help me come up with example test strings thanks");
  valid_notfour.push_back(" \n \r \t \\ ");
  valid_notfour.push_back(
    " !\"#$%&\\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~");  // ASCII
                                                                                                              // set
  valid_notfour.push_back("\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5");
  valid_notfour.push_back("\xe2\x99\x94\xe2\x99\x95\xe2\x99\x99\xe2\x99\x9f");  // Random chess pieces
  valid_four.push_back("\xf0\x9f\x98\x80\xf0\x9f\x98\xb1\xf0\x9f\x99\x8c");     // Random emoticons

  // Single-Byte scenarios
  valid_notfour.push_back("\x01");
  valid_notfour.push_back("\x65");

  // First possible sequences of certain length
  valid_notfour.push_back("\xc2\x80");
  valid_notfour.push_back("\xe0\xa0\x80");
  valid_four.push_back("\xf0\x90\x80\x80");

  // Last possible sequences of certain length
  valid_notfour.push_back("\x7f");
  valid_notfour.push_back("\xdf\xbf");
  valid_notfour.push_back("\xef\xbf\xbf");

  // Codepoint cap is U-0010FFFF
  valid_four.push_back("\xf4\x8f\xbf\xbf");

  // Valid boundary sequences
  valid_notfour.push_back("\xc2\xbf");
  valid_notfour.push_back("\xe0\xb2\xa0");
  valid_notfour.push_back("\xe1\x80\x80");
  valid_notfour.push_back("\xec\xbf\xbf");
  valid_notfour.push_back("\xed\x80\xa0");
  valid_notfour.push_back("\xed\x9f\xbf");
  valid_notfour.push_back("\xee\x80\x80");
  valid_notfour.push_back("\xef\xbf\xbf");
  valid_four.push_back("\xf0\x90\x80\x80");
  valid_four.push_back("\xf0\xbf\xbf\xbf");
  valid_four.push_back("\xf1\x80\x80\x80");
  valid_four.push_back("\xf1\xbf\xbf\xbf");
  valid_four.push_back("\xf3\xbf\xbf\xbf");

  // Valid sequences - based on code coverage
  valid_notfour.push_back("\xd6\x80");
  valid_notfour.push_back("\xc2\x9d");
  valid_notfour.push_back("\xe0\xaf\x80");
  valid_notfour.push_back("\xed\x8c\x80");
  valid_four.push_back("\xf3\x80\x80\x80");
  valid_four.push_back("\xf0\x90\x80\x80");
  valid_four.push_back("twitch\xf0\xaf\x80\x80");
  valid_four.push_back("hello\xf4\x8e\x80\x80");
  valid_four.push_back("abc\xf3\x92\xa5\xbe");

  // Invalid sequences - based on code coverage
  invalid.push_back("\xc2\x4f");
  invalid.push_back("\xcf\x5f");
  invalid.push_back("\xf5\x80\x80\x80");
  invalid.push_back("\xf6\x80\x80\x80");
  invalid.push_back("\xf7\x80\x80\x80");
  invalid.push_back("\xf0\x8d\x80\x80");
  invalid.push_back("\xf4\x9d\x80\x80");
  invalid.push_back("\xc0\x7f");
  invalid.push_back("\xc2\x7f");
  invalid.push_back("\xf3\x80\x80\x7f");
  invalid.push_back("\xe0\xaf\x7f");
  invalid.push_back("\xed\x8c\x7f");
  invalid.push_back("\xf0\x90\x80\x7f");
  invalid.push_back("\xf0\xaf\x80\x7f");
  invalid.push_back("\xf4\x8e\x80\x7f");
  invalid.push_back("\xf3\x92\xa5\x7f");

  // Invalid boundary sequences
  invalid.push_back("\xc0\x80");
  invalid.push_back("\xc1\xbf");
  invalid.push_back("\xe0\x80\x80");
  invalid.push_back("\xe0\x9f\xbf");
  invalid.push_back("\xed\xa0\x80");
  invalid.push_back("\xf0\x80\x80\x80");
  invalid.push_back("\xf0\x8f\xbf\xbf");
  invalid.push_back("\xf4\x90\x80\x80");
  invalid.push_back("\xf5\x80\x80\x80");
  invalid.push_back("\xf7\x80\x80\x80");
  invalid.push_back("\xff\x80\x80\x80");
  invalid.push_back("\xff\xbf\xbf\xbf");

  // Invalid - starting with continuation code unit
  invalid.push_back("\x80");
  invalid.push_back("\xbf");
  invalid.push_back("\x80\xba");
  invalid.push_back("\x87\x8d");
  invalid.push_back("\x9d\xa0");
  invalid.push_back("\xbf\xbf");
  invalid.push_back("\x80\xa0\x9c");
  invalid.push_back("\x80\xa0\xa3\x8e");

  // Invalid - start characters with no continuation code units
  invalid.push_back("\xc2 ");
  invalid.push_back("\xc3 ");
  invalid.push_back("\xc4 ");
  invalid.push_back("\xc5 ");
  invalid.push_back("\xc6 ");
  invalid.push_back("\xc7 ");
  invalid.push_back("\xc8 ");
  invalid.push_back("\xc9 ");
  invalid.push_back("\xca ");
  invalid.push_back("\xcb ");
  invalid.push_back("\xcc ");
  invalid.push_back("\xcd ");
  invalid.push_back("\xce ");
  invalid.push_back("\xcf ");
  invalid.push_back("\xd0 ");
  invalid.push_back("\xd1 ");
  invalid.push_back("\xd2 ");
  invalid.push_back("\xd3 ");
  invalid.push_back("\xd4 ");
  invalid.push_back("\xd5 ");
  invalid.push_back("\xd6 ");
  invalid.push_back("\xd7 ");
  invalid.push_back("\xd8 ");
  invalid.push_back("\xd9 ");
  invalid.push_back("\xda ");
  invalid.push_back("\xdb ");
  invalid.push_back("\xdc ");
  invalid.push_back("\xdd ");
  invalid.push_back("\xde ");
  invalid.push_back("\xdf ");
  invalid.push_back("\xe0 ");
  invalid.push_back("\xe1 ");
  invalid.push_back("\xe2 ");
  invalid.push_back("\xe3 ");
  invalid.push_back("\xe4 ");
  invalid.push_back("\xe5 ");
  invalid.push_back("\xe6 ");
  invalid.push_back("\xe7 ");
  invalid.push_back("\xe8 ");
  invalid.push_back("\xe9 ");
  invalid.push_back("\xea ");
  invalid.push_back("\xeb ");
  invalid.push_back("\xec ");
  invalid.push_back("\xed ");
  invalid.push_back("\xee ");
  invalid.push_back("\xef ");
  invalid.push_back("\xf0 ");
  invalid.push_back("\xf1 ");
  invalid.push_back("\xf2 ");
  invalid.push_back("\xf3 ");
  invalid.push_back("\xf4 ");

  // Invalid - sequences with bytes missing
  invalid.push_back("\xc2");
  invalid.push_back("\xe0\xb2");
  invalid.push_back("\xe1\x80");
  invalid.push_back("\xec\xbf");
  invalid.push_back("\xed\x80");
  invalid.push_back("\xed\x9f");
  invalid.push_back("\xee\x80");
  invalid.push_back("\xef\xbf");
  invalid.push_back("\xf0\x90");
  invalid.push_back("\xf0\xbf");
  invalid.push_back("\xf1\x80");
  invalid.push_back("\xf1\xbf");
  invalid.push_back("\xf3\xbf");
  invalid.push_back("\xf0\x90\x80");
  invalid.push_back("\xf0\xbf\xbf");
  invalid.push_back("\xf1\x80\x80");
  invalid.push_back("\xf1\xbf\xbf");
  invalid.push_back("\xf3\xbf\xbf");

  // Invalid - 0xfe and 0xff neer appear in a UTF-8 string
  invalid.push_back("\xfe");
  invalid.push_back("\xff");
  invalid.push_back("\xfe\xff");

  // Invalid - overlong sequences
  invalid.push_back("\xc0\xaf");
  invalid.push_back("\xe0\x80\xaf");
  invalid.push_back("\xf0\x80\x80\xaf");

  // Invalid - too many connecting code units
  invalid.push_back("\x01\x80");
  invalid.push_back("\x65\x80");
  invalid.push_back("\xc2\xbf\x80");
  invalid.push_back("\xe0\xb2\xa0\x80");
  invalid.push_back("\xe1\x80\x80\x80");
  invalid.push_back("\xec\xbf\xbf\x80");
  invalid.push_back("\xed\x80\xa0\x80");
  invalid.push_back("\xed\x9f\xbf\x80");
  invalid.push_back("\xee\x80\x80\x80");
  invalid.push_back("\xef\xbf\xbf\x80");
  invalid.push_back("\xf0\x90\x80\x80\x80");
  invalid.push_back("\xf0\xbf\xbf\xbf\x80");
  invalid.push_back("\xf1\x80\x80\x80\x80");
  invalid.push_back("\xf1\xbf\xbf\xbf\x80");
  invalid.push_back("\xf3\xbf\xbf\xbf\x80");

  // Invalid - Single UTF-16 surrogates
  invalid.push_back("\xed\xa0\x80");
  invalid.push_back("\xed\xad\xbf");
  invalid.push_back("\xed\xae\x80");
  invalid.push_back("\xed\xaf\xbf");
  invalid.push_back("\xed\xb0\x80");
  invalid.push_back("\xed\xbe\x80");
  invalid.push_back("\xed\xbf\xbf");

  // Invalid - Paired UTF-16 surrogates
  invalid.push_back("\xed\xa0\x80\xed\xb0\x80");
  invalid.push_back("\xed\xa0\x80\xed\xbf\xbf");
  invalid.push_back("\xed\xad\xbf\xed\xb0\x80");
  invalid.push_back("\xed\xad\xbf\xed\xbf\xbf");
  invalid.push_back("\xed\xae\x80\xed\xb0\x80");
  invalid.push_back("\xed\xae\x80\xed\xbf\xbf");
  invalid.push_back("\xed\xaf\xbf\xed\xb0\x80");
  invalid.push_back("\xed\xaf\xbf\xed\xbf\xbf");

  std::string allStringsCombined = "";
  std::string allStringsNotFourCombined = "";

  bool hasFourByteChars = false;
  for (const auto& str : invalid) {
    ASSERT_FALSE(ttv::IsValidUtf8(str.c_str(), hasFourByteChars));
  }

  for (const auto& str : valid_notfour) {
    ASSERT_TRUE(ttv::IsValidUtf8(str.c_str(), hasFourByteChars));
    ASSERT_FALSE(hasFourByteChars);

    allStringsCombined += str;
    allStringsNotFourCombined += str;
  }

  for (const auto& str : valid_four) {
    ASSERT_TRUE(ttv::IsValidUtf8(str.c_str(), hasFourByteChars));
    ASSERT_TRUE(hasFourByteChars);

    allStringsCombined += str;
  }

  ASSERT_TRUE(ttv::IsValidUtf8(allStringsCombined.c_str(), hasFourByteChars));
  ASSERT_TRUE(hasFourByteChars);

  ASSERT_TRUE(ttv::IsValidUtf8(allStringsNotFourCombined.c_str(), hasFourByteChars));
  ASSERT_FALSE(hasFourByteChars);
}
