/****************************************************************************
 * Twitch SDK
 *
 * This software is supplied under the terms of a license agreement with
 * Twitch Interactive, Inc. and may not be copied or used except in accordance
 * with the terms of that agreement
 *
 * Copyright (c) 2012-2016 Twitch Interactive, Inc.
 ***************************************************************************/

#include "twitchsdk/chat/internal/pch.h"

#include "twitchsdk/chat/internal/chatmessageparsing.h"

#include "twitchsdk/chat/internal/chathelpers.h"
#include "twitchsdk/chat/internal/chatuserbadges.h"
#include "twitchsdk/chat/internal/ircstring.h"
#include "twitchsdk/chat/internal/useremoticonsets.h"
#include "twitchsdk/chat/tokenranges.h"
#include "twitchsdk/core/stringutilities.h"
#include "twitchsdk/core/utf8.h"

#include <functional>
#include <sstream>

namespace ttv {
namespace chat {

namespace {

/**
 * Returns a pointer to the first character in the next word
 * Returns same pointer if not on whitespace
 */
const char* AdvanceToNextWord(const char* str) {
  if (str == nullptr || *str == '\0') {
    return nullptr;
  }

  // Skip whitespace
  while (*str != '\0' && ttv::IsWhitespace(*str)) {
    str++;
  }

  if (*str == '\0') {
    return nullptr;
  } else {
    return str;
  }
}

/**
 * Returns a pointer to the last character in a word
 * Returns same pointer if on whitespace
 */
const char* AdvanceToEndOfWord(const char* str) {
  if (str == nullptr || *str == '\0') {
    return nullptr;
  }
  // Skip word characters
  else if (!ttv::IsWhitespace(*str)) {
    const char* next = str + 1;
    while (*next != '\0' && !ttv::IsWhitespace(*next)) {
      str = next;
      next++;
    }
  }

  return str;
}

/**
 * Returns a pointer to the last character in a word / potential mention token.
 * Returns same pointer if on whitespace
 */
const char* AdvanceToEndOfMention(const char* str) {
  if (str == nullptr || *str == '\0') {
    return nullptr;
  } else if (!ttv::IsWhitespace(*str)) {
    const char* next = str + 1;

    // Alphanumeric characters, UTF8, and @ mentions end at special characters or whitespace
    if (*str == '@' || ttv::chat::IsSimpleWordCharacter(*str) || ttv::IsUtf8StartByte(*str)) {
      while (ttv::chat::IsSimpleWordCharacter(*next) || ttv::IsUtf8StartByte(*next)) {
        str = next;
        next++;
      }
    }
    // Special characters end at alphanumeric characters or @
    else {
      while (*next != '\0' && !ttv::IsWhitespace(*next) && !ttv::chat::IsSimpleWordCharacter(*next) && *next != '@') {
        str = next;
        next++;
      }
    }
  }

  return str;
}

void InsertTextTokens(const std::string& message, size_t textStart, size_t textEnd,
  const std::vector<ttv::chat::AutoModFlagsRange>& autoModFlagsRanges, size_t& flagsIndex,
  std::vector<std::unique_ptr<ttv::chat::MessageToken>>& result) {
  // Split up text tokens if sections are flagged by automod
  while (flagsIndex < autoModFlagsRanges.size() && autoModFlagsRanges[flagsIndex].startIndex < textStart) {
    flagsIndex++;
  }

  while (flagsIndex < autoModFlagsRanges.size() && textEnd > autoModFlagsRanges[flagsIndex].endIndex) {
    auto& flagsRange = autoModFlagsRanges[flagsIndex];
    if (flagsRange.startIndex > textStart) {
      result.emplace_back(
        std::make_unique<ttv::chat::TextToken>(message.substr(textStart, flagsRange.startIndex - textStart)));
    }
    result.emplace_back(std::make_unique<ttv::chat::TextToken>(
      message.substr(flagsRange.startIndex, flagsRange.endIndex - flagsRange.startIndex + 1), flagsRange.flags));
    textStart = flagsRange.endIndex + 1;
    flagsIndex++;
  }

  if (textStart < textEnd) {
    result.emplace_back(std::make_unique<ttv::chat::TextToken>(message.substr(textStart, textEnd - textStart)));
  }
}
}  // namespace
}  // namespace chat
}  // namespace ttv

bool ttv::chat::ParseEmotesMessageTag(
  const std::string& emotesMessageTag, std::map<std::string, std::vector<EmoteRange>>& result) {
  // Extract the emoticon ids in use
  std::vector<std::string> tokenRanges;
  Split(emotesMessageTag, tokenRanges, '/', false);

  for (auto& ranges : tokenRanges) {
    // Extract the emoticon id
    size_t colonIndex = ranges.find(":");

    // Invalid format
    if (colonIndex == std::string::npos || colonIndex == 0 || colonIndex == ranges.size() - 1) {
      continue;
    }

    std::string emoticonId = ranges.substr(0, colonIndex);

    // Invalid emoticon id
    if (emoticonId.empty()) {
      continue;
    }

    auto& emoteList = result[emoticonId];

    // Extract the ranges for the emoticon id
    ranges = ranges.substr(colonIndex + 1);

    std::vector<std::string> tokens;
    Split(ranges, tokens, ',', false);

    for (auto token : tokens) {
      std::vector<std::string> indices;
      Split(token, indices, '-', false);

      if (indices.size() == 2) {
        EmoteRange range;

        bool numFilled = ParseNum(indices[0], range.startIndex);
        if (!numFilled) {
          continue;
        }

        numFilled = ParseNum(indices[1], range.endIndex);
        if (!numFilled) {
          continue;
        }

        if (range.startIndex > range.endIndex) {
          continue;
        }

        emoteList.emplace_back(range);
      }
    }
  }

  return true;
}

bool ttv::chat::ParseBadgesMessageTag(
  const std::string& messageTag, std::vector<std::pair<std::string, std::string>>& result) {
  // badges={badge_set_id}/{badge_set_version},{badge_set_id}/{badge_set_version}...

  std::vector<std::string> messageTokens;
  Split(messageTag, messageTokens, ',', false);

  std::vector<std::string> badgeTokens;
  for (auto messageToken : messageTokens) {
    Split(messageToken, badgeTokens, '/', false);
    if (badgeTokens.size() == 2) {
      result.emplace_back(badgeTokens[0], badgeTokens[1]);
    }

    badgeTokens.clear();
  }

  return true;
}

bool ttv::chat::ParseAutoModFlagsMessageTag(const std::string& messageTag, std::vector<AutoModFlagsRange>& result) {
  // flags={start_index}-{end_index}:{category}.{severity}/{category}.{severity},...

  std::vector<std::string> rangesInfo;
  Split(messageTag, rangesInfo, ',', false);

  for (const auto& rangeInfo : rangesInfo) {
    AutoModFlagsRange autoModFlagsRange;
    size_t dashIndex = rangeInfo.find('-');
    size_t colonIndex = rangeInfo.find(':');

    if (dashIndex == std::string::npos || colonIndex == std::string::npos) {
      continue;
    }

    std::vector<std::string> severityLevels;
    std::string severityLevelsStr = rangeInfo.substr(colonIndex + 1, rangeInfo.size() - colonIndex - 1);
    Split(severityLevelsStr, severityLevels, '/', false);

    if (severityLevels.empty()) {
      continue;
    }

    if (!ParseNum(rangeInfo.substr(0, dashIndex), autoModFlagsRange.startIndex) ||
        !ParseNum(rangeInfo.substr(dashIndex + 1, colonIndex - dashIndex - 1), autoModFlagsRange.endIndex)) {
      continue;
    }

    for (const auto& severityLevelStr : severityLevels) {
      if (severityLevelStr.size() < 3 || severityLevelStr[1] != '.') {
        continue;
      }

      uint32_t severityLevel;
      if (!ParseNum(severityLevelStr.substr(2, severityLevelStr.size() - 2), severityLevel)) {
        continue;
      }

      switch (severityLevelStr[0]) {
        case 'A':
          autoModFlagsRange.flags.aggressiveLevel = severityLevel;
          break;
        case 'I':
          autoModFlagsRange.flags.identityLevel = severityLevel;
          break;
        case 'P':
          autoModFlagsRange.flags.profanityLevel = severityLevel;
          break;
        case 'S':
          autoModFlagsRange.flags.sexualLevel = severityLevel;
          break;
        default:
          break;
      }
    }

    result.emplace_back(std::move(autoModFlagsRange));
  }

  return true;
}

bool ttv::chat::GenerateBadgesMessageTag(
  const std::vector<std::pair<std::string, std::string>>& badges, std::string& result) {
  if (badges.size() == 0) {
    return false;
  }

  std::stringstream messageTagStream;

  bool firstIteration = true;
  for (const auto& badgePair : badges) {
    if (!firstIteration) {
      messageTagStream << ',';
    } else {
      firstIteration = false;
    }

    messageTagStream << badgePair.first << '/' << badgePair.second;
  }

  result = messageTagStream.str();
  return true;
}

bool ttv::chat::TokenizeLocalMessage(const std::shared_ptr<User>& user, ChannelId channelId, const std::string& message,
  std::string& emotesMessageTag, std::string& badgesMessageTag) {
  emotesMessageTag = "";
  badgesMessageTag = "";

  // Get the local user info
  std::shared_ptr<UserEmoticonSets> userEmoticonSets;
  std::shared_ptr<ChatUserBadges> userBadges;

  if (user != nullptr) {
    userEmoticonSets = user->GetComponentContainer()->GetComponent<UserEmoticonSets>();
    userBadges = user->GetComponentContainer()->GetComponent<ChatUserBadges>();
  } else {
    return false;
  }

  if (userEmoticonSets == nullptr) {
    return false;
  }

  if (userBadges != nullptr) {
    badgesMessageTag = userBadges->GetBadgesMessageTag(channelId);
  }

  // If configured to not parse emoticons then we're done.
  if (!(userEmoticonSets->GetTokenizationOptions().emoticons)) {
    return true;
  }

  std::vector<EmoticonSet> emoticonSets;
  TTV_ErrorCode ec = userEmoticonSets->GetUserEmoticonSets(emoticonSets);

  std::vector<TokenRange> ranges;

  const auto messageBegin = message.begin();
  const auto messageEnd = message.end();
  auto current = messageBegin;
  int rangeNum = 0;
  while (current != messageEnd) {
    // Move to first non-whitespace and non-utf8
    while (current != messageEnd && (IsUtf8StartByte(*current) || isspace(*current))) {
      ++current;
    }

    auto wordBegin = current;
    if (current == messageEnd) {
      break;
    }

    // Consume non-whitespace and non-utf8 characters
    while (current != messageEnd && !IsUtf8StartByte(*current) && !isspace(*current)) {
      ++current;
    }

    std::string word(wordBegin, current);

    if (TTV_FAILED(ec)) {
      return false;
    }
    for (auto emoticonSetIter = emoticonSets.begin(); emoticonSetIter != emoticonSets.end(); ++emoticonSetIter) {
      // Modified emotes will contain a modifier code in the token and id.  The modifier code should be captured to
      // allow for correct asset downloading.
      std::string modifierCode;
      auto emoticonIter = std::find_if(emoticonSetIter->emoticons.begin(), emoticonSetIter->emoticons.end(),
        [&word, &modifierCode](const Emoticon& emoticon) {
          if (emoticon.isRegex) {
            return std::regex_match(word, emoticon.regex);
          } else if (emoticon.match == word) {
            return true;
          }
          // Modified emotes will contain the root token with the modifier suffix.  The modifier will need capture to
          // allow a fully qualified token and id to be handled. "token" => token_hf"
          else if (word.find(emoticon.match) == 0) {
            for (const auto& emoteMod : emoticon.modifiers) {
              if (word == emoticon.match + "_" + emoteMod.code) {
                modifierCode = emoteMod.code;
                return true;
              }
            }
          }

          return false;
        });

      if (emoticonIter != emoticonSetIter->emoticons.end()) {
        TokenRange range;
        range.type = TOKEN_RANGE_TYPE_EMOTICON;
        range.emoticon.emoticonId =
          modifierCode.empty() ? emoticonIter->emoticonId : emoticonIter->emoticonId + "_" + modifierCode;
        range.startIndex = static_cast<int>(wordBegin - messageBegin);
        range.endIndex = static_cast<int>((current - messageBegin) - 1);
        range.rangeNum = rangeNum;
        ranges.push_back(range);

        rangeNum++;

        break;
      }
    }
  }

  if (!ranges.empty()) {
    // Stable sort the ranges by start index, preserving the order where duplicate start indices occur.
    // Ranges that were found sooner take precedence over ranges found later
    std::sort(ranges.begin(), ranges.end(), tokenranges::SortTokenRangesByStartIndex);

    // Now remove ranges that overlap, keeping the left-most range
    tokenranges::RemoveOverlappingRanges(ranges);

    // Now adjust the indices accounting for UTF8 encoding
    ranges = tokenranges::ConvertByteRangesToUtf8Ranges(ranges, message);

    // Now sort the ranges by emoticon id so we can group them in the final result
    std::sort(ranges.begin(), ranges.end(), tokenranges::SortEmoticonRangesByEmoticonId);

    // Stringify the result in the format
    //    emote=1:2-3,5-6/2:9-15

    std::stringstream stream;

    std::string currentId;
    for (TokenRange& range : ranges) {
      if (currentId == range.emoticon.emoticonId) {
        stream << "," << range.startIndex << "-" << range.endIndex;
      } else {
        if (!currentId.empty()) {
          stream << "/";
        }

        currentId = range.emoticon.emoticonId;

        stream << range.emoticon.emoticonId << ":" << range.startIndex << "-" << range.endIndex;
      }
    }

    emotesMessageTag = stream.str();
  }

  return true;
}

void ttv::chat::TokenizeServerMessage(const std::string& message, const TokenizationOptions& tokenizationOptions,
  const std::string& emotesMessageTag, const std::shared_ptr<BitsConfiguration>& bitsConfig,
  const std::vector<std::string>& localUserNames, MessageInfo& tokenizedMessage) {
  TokenizeServerMessage(
    message, tokenizationOptions, emotesMessageTag, "", bitsConfig, localUserNames, tokenizedMessage);
}

void ttv::chat::TokenizeServerMessage(const std::string& message, const TokenizationOptions& tokenizationOptions,
  const std::string& emotesMessageTag, const std::string& autoModFlagsMessageTag,
  const std::shared_ptr<BitsConfiguration>& bitsConfig, const std::vector<std::string>& localUserNames,
  MessageInfo& tokenizedMessage) {
  std::map<std::string, std::vector<EmoteRange>> emoticonRanges;
  ParseEmotesMessageTag(emotesMessageTag, emoticonRanges);

  std::vector<AutoModFlagsRange> autoModFlagsRanges;
  ParseAutoModFlagsMessageTag(autoModFlagsMessageTag, autoModFlagsRanges);

  TokenizeServerMessage(message, tokenizationOptions, emoticonRanges, autoModFlagsRanges, bitsConfig, localUserNames,
    tokenizedMessage.tokens);
}

void ttv::chat::TokenizeServerMessage(const std::string& message, const TokenizationOptions& tokenizationOptions,
  const std::map<std::string, std::vector<EmoteRange>>& emoticonRanges, const std::vector<std::string>& localUserNames,
  MessageInfo& tokenizedMessage) {
  TokenizeServerMessage(
    message, tokenizationOptions, emoticonRanges, {}, nullptr, localUserNames, tokenizedMessage.tokens);
}

void ttv::chat::TokenizeServerMessage(const std::string& message, const TokenizationOptions& tokenizationOptions,
  const std::map<std::string, std::vector<EmoteRange>>& emoticonRanges,
  const std::shared_ptr<BitsConfiguration>& bitsConfig, const std::vector<std::string>& localUserNames,
  MessageInfo& tokenizedMessage) {
  TokenizeServerMessage(
    message, tokenizationOptions, emoticonRanges, {}, bitsConfig, localUserNames, tokenizedMessage.tokens);
}

void ttv::chat::TokenizeServerMessage(const std::string& message, const TokenizationOptions& tokenizationOptions,
  const std::map<std::string, std::vector<EmoteRange>>& emoticonRanges,
  const std::vector<AutoModFlagsRange>& autoModFlagsRanges, const std::shared_ptr<BitsConfiguration>& bitsConfig,
  const std::vector<std::string>& localUserNames, std::vector<std::unique_ptr<MessageToken>>& tokens) {
  // NOTE: parsedRanges is always sorted by increasing start index
  std::vector<TokenRange> parsedRanges;
  auto autoModFlagsRangesBytes = tokenranges::ConvertUtf8RangesToByteRanges(autoModFlagsRanges, message);

  if (message != "") {
    if (tokenizationOptions.emoticons) {
      parsedRanges = tokenranges::ConvertUtf8RangesToByteRanges(
        tokenranges::ConvertToTokenRanges(emoticonRanges, message.length()), message);
    }

    if ((tokenizationOptions.mentions) || (tokenizationOptions.urls) || (tokenizationOptions.bits)) {
      // Check each unused word to see if they're a token
      const char* messageStart = message.c_str();
      const char* wordStart = messageStart;
      if (IsWhitespace(wordStart[0])) {
        wordStart = AdvanceToNextWord(wordStart);
      }
      const char* wordEnd = nullptr;

      auto insertIter = parsedRanges.begin();

      while (wordStart != nullptr) {
        wordEnd = AdvanceToEndOfWord(wordStart);

        size_t wordLength = static_cast<size_t>(wordEnd - wordStart + 1);

        int wordStartIndex = static_cast<int>(wordStart - messageStart);
        int wordEndIndex = static_cast<int>(wordEnd - messageStart);

        // Determine if this word is already for an existing token
        bool skip = false;
        while (insertIter != parsedRanges.end()) {
          const auto& cur = *insertIter;

          if (cur.startIndex > wordStartIndex) {
            break;
          } else if (cur.startIndex == wordStartIndex) {
            skip = true;
            break;
          } else {
            insertIter++;
          }
        }

        // We don't already have a token for this word
        bool keep = false;
        TokenRange range;

        if (!skip) {
          std::string word(wordStart, wordLength);

          if ((bitsConfig != nullptr) && tokenizationOptions.bits) {
            for (const auto& cheermote : bitsConfig->GetCheermotes()) {
              if (word.size() > cheermote.prefix.size() &&
                  strncasecmp(word.c_str(), cheermote.prefix.c_str(), cheermote.prefix.size()) == 0) {
                std::string value = word.substr(cheermote.prefix.size());
                range.data.bits.prefixLength = static_cast<uint32_t>(cheermote.prefix.size());
                range.data.bits.numBits = 0;

                int numBits = 0;
                bool valueIsNum = true;
                for (const char& c : value) {
                  if (isdigit(c)) {
                    numBits = (numBits * 10) + (static_cast<int>((c - '0')));
                  } else {
                    valueIsNum = false;
                    break;
                  }
                }

                if (valueIsNum && numBits > 0) {
                  range.data.bits.numBits = static_cast<uint32_t>(numBits);
                  range.type = TOKEN_RANGE_TYPE_BITS;
                  keep = true;
                  skip = true;
                }
              }
            }
          }

          if (!skip) {
            if (tokenizationOptions.urls) {
              if (IsTwitchChatUrl(word)) {
                range.type = TOKEN_RANGE_TYPE_URL;
                keep = true;
                skip = true;
              }
            }
          }
        }

        // We have a new valid bits or url token, add it
        if (keep) {
          range.startIndex = wordStartIndex;
          range.endIndex = wordEndIndex;
          range.rangeNum = static_cast<int>(parsedRanges.size());

          insertIter = parsedRanges.insert(insertIter, range);
          insertIter++;
        }

        // Did not find url or bits tokens, search within word for mentions
        if (!skip && tokenizationOptions.mentions) {
          const char* potentialMentionStart = wordStart;
          const char* potentialMentionEnd = wordEnd;

          // Certain characters are allowed before an @ mention.
          while (potentialMentionStart < potentialMentionEnd &&
                 (*potentialMentionStart == '\\' || *potentialMentionStart == '\'' || *potentialMentionStart == '"' ||
                   *potentialMentionStart == '*' || *potentialMentionStart == '(' || *potentialMentionStart == '[' ||
                   *potentialMentionStart == '{' || *potentialMentionStart == '<' || *potentialMentionStart == '/')) {
            potentialMentionStart++;
          }

          if (potentialMentionStart[0] == '@') {
            potentialMentionEnd = AdvanceToEndOfMention(potentialMentionStart);
            size_t mentionLength = static_cast<size_t>(potentialMentionEnd - potentialMentionStart + 1);

            if (mentionLength > 2) {
              range.type = TOKEN_RANGE_TYPE_MENTION;
              keep = true;
            }
          } else if (potentialMentionStart == wordStart) {
            for (const auto& name : localUserNames) {
              if (wordLength == name.size() && strncasecmp(potentialMentionStart, name.c_str(), name.size()) == 0) {
                range.type = TOKEN_RANGE_TYPE_MENTION;
                keep = true;
                break;
              }
            }
          }

          if (keep) {
            range.startIndex = static_cast<int>(potentialMentionStart - messageStart);
            range.endIndex = static_cast<int>(potentialMentionEnd - messageStart);
            range.rangeNum = static_cast<int>(parsedRanges.size());

            insertIter = parsedRanges.insert(insertIter, range);
            insertIter++;
          }
        }

        // Prepare for the next word
        wordStart = AdvanceToNextWord(wordEnd + 1);
      }
    }
  }

  // Remove ranges that overlap
  tokenranges::RemoveOverlappingRanges(parsedRanges);

  // Create the tokens
  std::vector<std::unique_ptr<MessageToken>> result;
  TokenRange current;
  current.type = TOKEN_RANGE_TYPE_EMOTICON;
  current.data = {};
  current.startIndex = -1;
  current.endIndex = -1;
  current.rangeNum = -1;

  size_t autoModFlagsRangesIndex = 0;

  for (auto& range : parsedRanges) {
    // Insert a text token before other type of token if needed
    if (current.endIndex < range.startIndex - 1) {
      InsertTextTokens(message, static_cast<size_t>(current.endIndex) + 1, static_cast<size_t>(range.startIndex),
        autoModFlagsRangesBytes, autoModFlagsRangesIndex, result);
    }

    size_t len = static_cast<size_t>(range.endIndex - range.startIndex + 1);

    // Insert the appropriate token
    switch (range.type) {
      case TOKEN_RANGE_TYPE_EMOTICON: {
        result.emplace_back(std::make_unique<EmoticonToken>(
          message.substr(static_cast<size_t>(range.startIndex), len), range.emoticon.emoticonId));

        break;
      }
      case TOKEN_RANGE_TYPE_MENTION: {
        std::string mentionText = message.substr(static_cast<size_t>(range.startIndex), len);
        std::string userName;

        if (mentionText[0] == '@') {
          // Cut off the "@" from the beginning of the mention text to get the username.
          userName = mentionText.substr(1, len - 1);
        } else {
          userName = mentionText;
        }

        bool isLocalUser = false;
        for (const auto& name : localUserNames) {
          if (userName.size() == name.size() && strncasecmp(userName.c_str(), name.c_str(), userName.size()) == 0) {
            isLocalUser = true;
            break;
          }
        }
        result.emplace_back(std::make_unique<MentionToken>(userName, mentionText, isLocalUser));

        break;
      }
      case TOKEN_RANGE_TYPE_URL: {
        result.emplace_back(
          std::make_unique<UrlToken>(message.substr(static_cast<size_t>(range.startIndex), len), false));

        break;
      }
      case TOKEN_RANGE_TYPE_BITS: {
        result.emplace_back(std::make_unique<BitsToken>(
          message.substr(static_cast<size_t>(range.startIndex), range.data.bits.prefixLength),
          range.data.bits.numBits));

        break;
      }
    }

    current = range;
  }

  // Insert the remaining portion of the message as text
  if (current.endIndex < static_cast<int>(message.size()) - 1) {
    InsertTextTokens(message, static_cast<size_t>(current.endIndex) + 1, message.size(), autoModFlagsRangesBytes,
      autoModFlagsRangesIndex, result);
  }

  tokens = std::move(result);
}
