Source code for patois

"""Utilities for writing code that runs on CPython, Jython, and other VMs."""
from __future__ import (print_function, absolute_import,
                        unicode_literals, division)
# Copyright (c) 2014 Hank Gay
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
__author__ = 'Hank Gay <hank@realultimateprogramming.com>'
__version__ = '0.1.0'


import os
import platform
import re


[docs]def is_jython(): return platform.python_implementation() == "Jython"
invalid_unicode_template = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF%s]" if is_jython(): # Jython is based on UTF-16, and as such, does not allow the use of # unmatched surrogate pairs (\uD800-\uDFFF), in literals or otherwise. invalid_unicode_re = re.compile(invalid_unicode_template % "") else: # For those cases where unmatched surrogate pairs can exist, we still can't # use them in a literal (because it would break Jython to scan them). # Instead use one extra step of indirection and create surrogates with # unichr. invalid_unicode_re = re.compile(invalid_unicode_template % ( "%s-%s" % (unichr(0xD800), unichr(0xDFFF)),)) replace_characters_regexp = re.compile( "([%s-%s](?![%s-%s])|(?<![%s-%s])[%s-%s])" % ( unichr(0xD800), unichr(0xDBFF), unichr(0xDC00), unichr(0xDFFF), unichr(0xD800), unichr(0xDBFF), unichr(0xDC00), unichr(0xDFFF)))
[docs]def find_invalid_unicode(data): if is_jython(): return [] return invalid_unicode_re.findall(data)
[docs]def find_invalid_unicode_iter(data): if is_jython(): return iter([]) return invalid_unicode_re.finditer(data)
[docs]def scrub_invalid_unicode(data): if is_jython(): return data return replace_characters_regexp.sub("\ufffd", data)
def _is_ucs2(): if is_jython(): return false return len("\U0010FFFF") != 1 def _is_ucs4(): if is_jython(): return false return len("\U0010FFFF") == 1 JYTHON_COMPILED_EXTENSION = "$py.class"
[docs]def module_name_from_file_name(file_name): """Figure out a module's name given the name of a compiled Python file.""" if not file_name: return None if file_name.lower().endswith(JYTHON_COMPILED_EXTENSION): return file_name[:-len(JYTHON_COMPILED_EXTENSION)] return os.path.splitext(file_name)[0]