Source code for patois

"""Utilities for writing code that runs on CPython, Jython, and other VMs."""
from __future__ import (print_function, absolute_import,
                        unicode_literals, division)
# Copyright (c) 2014 Hank Gay
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
__author__ = 'Hank Gay <hank@realultimateprogramming.com>'
__version__ = '0.1.0'


import os
import platform
import re


[docs]def is_jython():
    return platform.python_implementation() == "Jython"


invalid_unicode_template = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF%s]"

if is_jython():
    # Jython is based on UTF-16, and as such, does not allow the use of
    # unmatched surrogate pairs (\uD800-\uDFFF), in literals or otherwise.
    invalid_unicode_re = re.compile(invalid_unicode_template % "")
else:
    # For those cases where unmatched surrogate pairs can exist, we still can't
    # use them in a literal (because it would break Jython to scan them).
    # Instead use one extra step of indirection and create surrogates with
    # unichr.
    invalid_unicode_re = re.compile(invalid_unicode_template % (
        "%s-%s" % (unichr(0xD800), unichr(0xDFFF)),))

replace_characters_regexp = re.compile(
        "([%s-%s](?![%s-%s])|(?<![%s-%s])[%s-%s])" % (
            unichr(0xD800), unichr(0xDBFF),
            unichr(0xDC00), unichr(0xDFFF),
            unichr(0xD800), unichr(0xDBFF),
            unichr(0xDC00), unichr(0xDFFF)))


[docs]def find_invalid_unicode(data):
    if is_jython():
        return []
    return invalid_unicode_re.findall(data)


[docs]def find_invalid_unicode_iter(data):
    if is_jython():
        return iter([])
    return invalid_unicode_re.finditer(data)


[docs]def scrub_invalid_unicode(data):
    if is_jython():
        return data
    return replace_characters_regexp.sub("\ufffd", data)


def _is_ucs2():
    if is_jython():
        return false
    return len("\U0010FFFF") != 1


def _is_ucs4():
    if is_jython():
        return false
    return len("\U0010FFFF") == 1


JYTHON_COMPILED_EXTENSION = "$py.class"


[docs]def module_name_from_file_name(file_name):
    """Figure out a module's name given the name of a compiled Python file."""
    if not file_name:
        return None

    if file_name.lower().endswith(JYTHON_COMPILED_EXTENSION):
        return file_name[:-len(JYTHON_COMPILED_EXTENSION)]

    return os.path.splitext(file_name)[0]