# Copyright 2016-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""An implementation of RFC4013 SASLprep."""
from __future__ import annotations

from typing import Any, Optional

try:
    import stringprep
except ImportError:
    HAVE_STRINGPREP = False

    def saslprep(
        data: Any,
        prohibit_unassigned_code_points: Optional[bool] = True,  # noqa: ARG001
    ) -> Any:
        """SASLprep dummy"""
        if isinstance(data, str):
            raise TypeError(
                "The stringprep module is not available. Usernames and "
                "passwords must be instances of bytes."
            )
        return data

else:
    HAVE_STRINGPREP = True
    import unicodedata

    # RFC4013 section 2.3 prohibited output.
    _PROHIBITED = (
        # A strict reading of RFC 4013 requires table c12 here, but
        # characters from it are mapped to SPACE in the Map step. Can
        # normalization reintroduce them somehow?
        stringprep.in_table_c12,
        stringprep.in_table_c21_c22,
        stringprep.in_table_c3,
        stringprep.in_table_c4,
        stringprep.in_table_c5,
        stringprep.in_table_c6,
        stringprep.in_table_c7,
        stringprep.in_table_c8,
        stringprep.in_table_c9,
    )

    def saslprep(data: Any, prohibit_unassigned_code_points: Optional[bool] = True) -> Any:
        """An implementation of RFC4013 SASLprep.

        :param data: The string to SASLprep. Unicode strings
            (:class:`str`) are supported. Byte strings
            (:class:`bytes`) are ignored.
        :param prohibit_unassigned_code_points: True / False. RFC 3454
            and RFCs for various SASL mechanisms distinguish between
            `queries` (unassigned code points allowed) and
            `stored strings` (unassigned code points prohibited). Defaults
            to ``True`` (unassigned code points are prohibited).

        :return: The SASLprep'ed version of `data`.
        """
        prohibited: Any

        if not isinstance(data, str):
            return data

        if prohibit_unassigned_code_points:
            prohibited = (*_PROHIBITED, stringprep.in_table_a1)
        else:
            prohibited = _PROHIBITED

        # RFC3454 section 2, step 1 - Map
        # RFC4013 section 2.1 mappings
        # Map Non-ASCII space characters to SPACE (U+0020). Map
        # commonly mapped to nothing characters to, well, nothing.
        in_table_c12 = stringprep.in_table_c12
        in_table_b1 = stringprep.in_table_b1
        data = "".join(
            ["\u0020" if in_table_c12(elt) else elt for elt in data if not in_table_b1(elt)]
        )

        # RFC3454 section 2, step 2 - Normalize
        # RFC4013 section 2.2 normalization
        data = unicodedata.ucd_3_2_0.normalize("NFKC", data)

        in_table_d1 = stringprep.in_table_d1
        if in_table_d1(data[0]):
            if not in_table_d1(data[-1]):
                # RFC3454, Section 6, #3. If a string contains any
                # RandALCat character, the first and last characters
                # MUST be RandALCat characters.
                raise ValueError("SASLprep: failed bidirectional check")
            # RFC3454, Section 6, #2. If a string contains any RandALCat
            # character, it MUST NOT contain any LCat character.
            prohibited = (*prohibited, stringprep.in_table_d2)
        else:
            # RFC3454, Section 6, #3. Following the logic of #3, if
            # the first character is not a RandALCat, no other character
            # can be either.
            prohibited = (*prohibited, in_table_d1)

        # RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
        for char in data:
            if any(in_table(char) for in_table in prohibited):
                raise ValueError("SASLprep: failed prohibited character check")

        return data