'''
Created on 17 Jul 2017
@author: paulross
'''
import collections
import functools
# import inspect
import sys
import re
[docs]class TypesExceptionBase(Exception):
"""Base class for exceptions thrown by the types module."""
pass
[docs]class FunctionTypesExceptionNoData(TypesExceptionBase):
"""Exception thrown when no call date has been added to a FunctionTypes object."""
pass
@functools.total_ordering
[docs]class Type(object):
"""This class holds type information extracted from a single object.
For sequences and so on this will contain a sequence of types.
"""
# Matches "<class 'int'>" to extract "int"
# re.ASCII is "<enum RegexFlag>"
RE_TYPE_STR_MATCH = re.compile(r'<(?:class|enum) \'(.+)\'>')
def __init__(self, obj, __ids=None):
"""Constructor with an object. __ids is used internally to prevent
infinite recursion when, for example, a list contains itself.
This constructor decomposes the object into its types."""
# print('TRACE: Type.__init__:', type(obj))
self._type = None
# __ids is a set of ID values from id(object)
if __ids is None:
__ids = set()
else:
assert isinstance(__ids, set)
assert len(__ids) > 0
if id(obj) in __ids:
self._type = type(obj)
else:
__ids.add(id(obj))
if isinstance(obj, list):
# List: insert unique types only, this is ordered by encounter
# but that is not regarded as significant.
self._type = []
for o in obj:
t = Type(o, __ids)
if t not in self._type:
self._type.append(t)
elif isinstance(obj, tuple):
# Tuple: make an tuple of all types, we specify tuple as this block also
# deals with namedtuples and using type(obj) will cause __new__ to fail.
if hasattr(obj, '_fields'):
# Presume a named tuple
# print('TRACE: namedtuple detected:', type(obj))
self._type = type(obj)(*[self._get_type(o, __ids) for o in obj])
else:
# print('TRACE: tuple detected:', type(obj))
self._type = tuple([self._get_type(o, __ids) for o in obj])
elif isinstance(obj, set):
# Set: insert unique types only by virtue of type(obj).
self._type = type(obj)([self._get_type(o, __ids) for o in obj])
elif isinstance(obj, dict):
# Dict: make a dict {key_type : set(value_types), ...}
self._type = {}
for k, v in obj.items():
key = self._get_type(k, __ids)
val = self._get_type(v, __ids)
try:
self._type[key].add(val)
except KeyError:
self._type[key] = set([val,])
else:
# Non-container, just the type() of the object.
self._type = type(obj)
def _get_type(self, obj, __ids):
"""Returns the type of the object as a type or Type object."""
if id(obj) in __ids:
return type(obj)
r = Type(obj, __ids)
__ids.add(id(obj))
return r
def __eq__(self, other):
# other could be a type object not just a Type object
if hasattr(other, '_type'):
return self._type == other._type
return False
def __lt__(self, other):
return str(self._type) < str(other._type)
def __hash__(self):
return hash(str(self))
def __str__(self):
if isinstance(self._type, (list, set, tuple)):
sl = [Type.str_of_object_type(self._type), '([']
if isinstance(self._type, (list, set)):
# List, set, unordered types
str_list = sorted([str(t) for t in self._type])
else:
# Tuple, maintain order of types
str_list = [str(t) for t in self._type]
sl.append(', '.join(str_list))
sl.append('])')
s = ''.join(sl)
return s
elif isinstance(self._type, dict):
sl = [Type.str_of_object_type(self._type), '({']
sep = ''
for k, v in self._type.items():
sl.append(sep)
v_str = '[' + ', '.join(sorted([str(_v) for _v in v])) + ']'
sl.append('{!s:s} : {:s}'.format(k, v_str))
sep = ', '
sl.append('})')
s = ''.join(sl)
return s
else:
return '{!s:s}'.format(self.str_of_type(self._type))
@classmethod
def str_of_type(cls, typ):
m = Type.RE_TYPE_STR_MATCH.match(str(typ))
if m is not None:
return m.group(1)
raise ValueError('Can not parse type: "{:s}"'.format(str(typ)))
@classmethod
def str_of_object_type(cls, obj):
m = Type.RE_TYPE_STR_MATCH.match(str(type(obj)))
if m is not None:
return m.group(1)
raise ValueError('Can not parse object: "{:s}", type {:s}'.format(str(obj), str(type(obj))))
[docs]class FunctionTypes:
"""Class that accumulate function call data such as call arguments,
return values and exceptions raised."""
# Translate type names into typing parlance
TYPE_NAME_TRANSLATION = {
'_io.StringIO' : 'IO[bytes]',
'NoneType' : 'None',
}
SELF = 'self'
# Alphabetical order
DOCSTRING_STYLES_AVAILABLE = tuple(sorted(('sphinx', 'google')))
def __init__(self, signature=None):
"""Constructor, takes no arguments, merely initialises internal state."""
super().__init__()
# An inspect.Signature object.
self.signature = signature
# TODO: Track a range of line numbers.
# 'call' must be always the same line number
# Since functions can not overlap the 'return' shows function bounds
#
# OrderedDict of {argument_name : set(types.Type), ...}
self.arguments = collections.OrderedDict()
# dict of {line_number : set(types.Type), ...}
self.return_types = {}
# TODO: Store the id() of the exception so that we can track how
# its arc through the stack.
# Something like {line : (types.Type, set(id...)), ...}
# On reflection, probably not as id() values might get reused.
#
# dict of {line_number : set(types.Type), ...}
self._exception_types = {}
# There should be at least one of these, possibly others for generators
# where yield is a re-entry point.
# The [0] element will be the lowest value, the others are unordered.
self.call_line_numbers = []
# Line numbers:
# No general sanity check is possible on the ordering of line numbers
# since property setters and getters can be called in any order.
# Generators have a call site at declaration and each yield statement
# Smallest seen line number
self.min_line_number = sys.maxsize
# Largest seen line number
self.max_line_number = 0
# TODO: Track call/return type pairs so we can use the @overload
# decorator in the .pyi files.
self.DOCSTRING_STYLE_FUNCTIONS = {
'sphinx' : self._docstring_sphinx,
'google' : self._docstring_google,
}
keys = tuple(sorted(self.DOCSTRING_STYLE_FUNCTIONS.keys()))
assert keys == self.DOCSTRING_STYLES_AVAILABLE
def __repr__(self):
"""Dump of the internal representation."""
def _str_list_add_dict(title, d, l):
sub_l = ['{:s}:'.format(title)]
if len(d):
for k, v in d.items():
sub_l.append('{!r:s} -> {!r:s}'.format(k, v))
else:
sub_l.append('N/A'.format(title))
l.append(' '.join(sub_l))
str_l = []
_str_list_add_dict('Argument types', self.argument_type_strings, str_l)
_str_list_add_dict('Return types', self.return_type_strings, str_l)
_str_list_add_dict('Exceptions', self.exception_type_strings, str_l)
str_l.append('Entry points: {!r:s}'.format(self.call_line_numbers))
str_l.append('Signature: {!s:s}'.format(self.signature))
return ', '.join(str_l)
def _stringify_dict_of_set(self, dofs):
ret = type(dofs)()
for k, v in dofs.items():
ret[k] = set([str(t) for t in v])
return ret
@property
def argument_type_strings(self):
"""A ``collections.OrderedDict`` of
``{argument_name : set(types, ...), ...}`` where the types are strings."""
return self._stringify_dict_of_set(self.arguments)
@property
def return_type_strings(self):
"""A dict of ``{line_number : set(types, ...), ...}`` for the return
values where the return types are strings.
There should only be one type in the set."""
return self._stringify_dict_of_set(self.return_types)
@property
def exception_type_strings(self):
"""A dict of ``{line_number : set(types, ...), ...}`` for any exceptions
raised where the return types are strings.
There should only be one type in the set."""
return self._stringify_dict_of_set(self._exception_types)
@property
def num_entry_points(self):
"""The number of entry points, 1 for normal functions >1 for generators. 0 Something wrong."""
return len(self.call_line_numbers)
@property
def line_decl(self):
"""Line number of the function declaration as an integer.
:returns: ``int`` -- Function declaration line.
:raises: ``FunctionTypesExceptionNoData`` If there is no entry points recorded.
"""
if len(self.call_line_numbers) == 0:
raise FunctionTypesExceptionNoData()
return self.call_line_numbers[0]
@property
def line_range(self):
"""A pair of line numbers of the span of the function as integers.
The first is the declaration of the function, the last is the extreme
return point or exception."""
if len(self.call_line_numbers) == 0:
raise FunctionTypesExceptionNoData()
return self.min_line_number, self.max_line_number
#---- Data acquisition. ----
[docs] def add_call(self, arg_info, file_path, line_number):
"""Adds a function call from the frame."""
# arg_info is an ArgInfo object which is a named tuple from
# inspect.getargvalues(frame):
# ArgInfo(args, varargs, keywords, locals):
# args - list of names as strings.
# varargs - name entry in the locals for *args or None.
# keywords - name entry in the locals for *kwargs or None.
# locals - dict of {name : value, ...} of arguments.
for arg in arg_info.args:
t = Type(arg_info.locals[arg])
try:
self.arguments[arg].add(t)
except KeyError:
self.arguments[arg] = set([t])
if len(self.call_line_numbers) == 0:
# First call
self.call_line_numbers.append(line_number)
else:
# Add a new entry point for yield statements
if line_number not in self.call_line_numbers:
self.call_line_numbers.append(line_number)
# No general sanity check is possible on the ordering of line numbers
# since property setters and getters appear as the same
# function and can be called in any order.
# Generators have a call site at declaration and each yield
# statement
self.min_line_number = min(self.min_line_number, line_number)
self.max_line_number = max(self.max_line_number, line_number)
[docs] def add_return(self, return_value, line_number):
"""Records a return value at a particular line number.
If the return_value is None and we have previously seen an exception at
this line then this is a phantom return value and must be ignored.
See ``TypeInferencer.__enter__`` for a description of this.
"""
if return_value is None and line_number in self._exception_types:
# Ignore phantom return value of None immediately after an exception
return
t = Type(return_value)
try:
self.return_types[line_number].add(t)
except KeyError:
self.return_types[line_number] = set([t])
# No general sanity check is possible on the ordering of line numbers
# since property setters and getters can be called in any order.
# Generators have a call site at declaration and each yield statement
self.min_line_number = min(self.min_line_number, line_number)
self.max_line_number = max(self.max_line_number, line_number)
[docs] def add_exception(self, exception, line_number):
"""Add an exception."""
t = Type(exception)
try:
self._exception_types[line_number].add(t)
except KeyError:
self._exception_types[line_number] = set([t])
# No general sanity check is possible on the ordering of line numbers
# since property setters and getters can be called in any order.
# Generators have a call site at declaration and each yield statement
self.min_line_number = min(self.min_line_number, line_number)
self.max_line_number = max(self.max_line_number, line_number)
#---- END: Data acquisition. ----
[docs] def has_self_first_arg(self):
"""Returns True if 'self' is the first argument i.e. I am a method."""
arg_types = self.argument_type_strings
return len(arg_types.keys()) > 0 and list(arg_types.keys())[0] == self.SELF
[docs] def types_of_self(self):
"""Returns the set of types (as strings) as seen for the type of 'self'.
Returns None if 'self' is not the first argument i.e. I am not a method.
"""
arg_types = self.argument_type_strings
if len(arg_types.keys()) > 0 and list(arg_types.keys())[0] == self.SELF:
return arg_types[self.SELF]
[docs] def filtered_arguments(self):
"""A ``collections.OrderedDict`` of
``{argument_name : set(types, ...), ...}`` where the types are strings.
This removes the 'self' argument if it is the first argument."""
arg_types = self.argument_type_strings
if len(arg_types.keys()) > 0 and list(arg_types.keys())[0] == self.SELF:
del arg_types[self.SELF]
return arg_types
def __str__(self):
"""Returns something like the annotation string."""
sl = ['type:']
for arg in self.arguments:
arguments = sorted(self.arguments[arg])
if len(arguments) == 1:
sl.append('({:s} {:s})'.format(arg, str(arguments[0])))
else:
sl.append(
'({:s} {!r:s})'.format(
arg,
', '.join([str(v) for v in arguments])
)
)
# self.return_types is a dict of {line_number : set(types.Type), ...}
return_types = set()
for v in self.return_types.values():
return_types |= v
if len(return_types) == 0:
sl.append('-> None')
elif len(return_types) == 1:
sl.append('-> {:s}'.format(str(return_types.pop())))
else:
sl.append('-> Union[{:s}]'.format(
', '.join((self._type(str(t)) for t in return_types)))
)
return ' '.join(sl)
def _type(self, name):
"""Translates a type name if necessary."""
return self.TYPE_NAME_TRANSLATION.get(name, name)
[docs] def stub_file_str(self):
"""A string suitable for writing to a stub file.
Example::
def encodebytes(s: bytes) -> bytes: ...
"""
sl = ['(']
arg_str_list = []
for arg_name in self.arguments:
if arg_name.startswith(self.SELF):
arg_str_list.append(self.SELF)
else:
argument_types = sorted(self.arguments[arg_name])
if len(argument_types) == 1:
arg_str_list.append('{:s}: {:s}'.format(
arg_name,
self._type(str(argument_types[0]))))
else:
arg_str_list.append(
'{:s}: {:s}'.format(
arg_name,
', '.join([self._type(str(v)) for v in argument_types])
)
)
sl.append(', '.join(arg_str_list))
# self.return_types is a dict of {line_number : set(types.Type), ...}
sl.append(') ->')
return_types = set()
for v in self.return_types.values():
return_types |= v
if len(return_types) == 0:
sl.append(' None')
elif len(return_types) == 1:
sl.append(' {:s}'.format(self._type(str(return_types.pop()))))
else:
sl.append(
' Union[{:s}]'.format(
', '.join(
sorted(self._type(str(t)) for t in return_types)
)
)
)
sl.append(': ...')
return ''.join(sl)
def _insert_doc_marker(self, suffix):
return '<insert documentation for {:s}>'.format(suffix).replace(' ', '_')
def _docstring_sphinx(self, include_returns):
"""Returns as string that is the function documentation in the Sphinx
style. If include_returns is True then the return value documentation
is included. If false it is excluded, this is used for functions that
have no return value, __init__() for example.
Example: https://pythonhosted.org/an_example_pypi_project/sphinx.html
"def public_fn_with_sphinxy_docstring(name, state=None):"
:param include_returns: Whether to include documentation of the return
value.
:type include_returns: ``bool``
"""
str_l = ['"""']
str_l.append(self._insert_doc_marker('function'))
for arg, types in self.filtered_arguments().items():
str_l.append('')
str_l.append(':param {:s}: {:s}'.format(
arg,
self._insert_doc_marker('argument'))
)
str_l.append(':type {:s}: ``{:s}``'.format(arg, ', '.join(sorted(types))))
if include_returns:
str_l.append('')
# Returns
return_types = set()
for set_returns in self.return_type_strings.values():
return_types |= set_returns
# :returns: int -- the return code.
str_return_types = ','.join(sorted(return_types))
if str_return_types == 'NoneType':
str_l.append(':returns: ``{:s}``'.format(str_return_types))
else:
str_l.append(
':returns: ``{:s}`` -- {:s}'.format(
str_return_types,
self._insert_doc_marker('return values'),
)
)
# Exceptions, optional
if len(self._exception_types) > 0:
str_l.append('')
excepts = set()
for e in self.exception_type_strings.values():
excepts |= e
str_l.append(':raises: ``{:s}``'.format(', '.join(sorted(excepts))))
str_l.append('"""')
return '\n'.join(str_l)
def _docstring_google(self, include_returns):
"""Returns as string that is the function documentation in the Google
style. If include_returns is True then the return value documentation
is included. If false it is excluded, this is used for functions that
have no return value, __init__() for example.
Example: https://pythonhosted.org/an_example_pypi_project/sphinx.html
"def public_fn_with_googley_docstring(name, state=None):"
:param include_returns: Whether to include documentation of the return
value.
:type include_returns: ``bool``
"""
str_l = ['"""']
str_l.append(self._insert_doc_marker('function'))
args_types = self.filtered_arguments()
if len(args_types) > 0:
str_l.append('')
str_l.append('Args:')
for arg, types in args_types.items():
str_l.append(' {:s} ({:s}): {:s}'.format(
arg,
', '.join(sorted(types)),
self._insert_doc_marker('argument'))
)
if include_returns:
str_l.append('')
str_l.append('Returns:')
# Returns
return_types = set()
for set_returns in self.return_type_strings.values():
return_types |= set_returns
# :returns: int -- the return code.
str_return_types = ','.join(sorted(return_types))
if str_return_types == 'NoneType':
str_l.append(' {:s}'.format(str_return_types))
else:
str_l.append(
' {:s}. {:s}'.format(
str_return_types,
self._insert_doc_marker('return values'),
)
)
# Exceptions, optional
if len(self._exception_types) > 0:
str_l.append('')
str_l.append('Raises:')
excepts = set()
for e in self.exception_type_strings.values():
excepts |= e
str_l.append(' {:s}'.format(', '.join(sorted(excepts))))
str_l.append('"""')
return '\n'.join(str_l)
[docs] def docstring(self, include_returns, style='sphinx'):
"""Returns a pair (line_number, docstring) for this function. The
docstring is the __doc__ for the function and the line_number is the
docstring position (function declaration + 1).
So to insert into a list of lines called ``src``::
src[:line_number] + docstring.split('\\n') + src[line_number:]
style can be: 'sphinx', 'google'.
:raises: ``TypesExceptionBase`` or derived class.
"""
# despatch = {
# 'sphinx' : self._docstring_sphinx,
# 'google' : self._docstring_google,
# }
# if style not in despatch:
# raise ValueError(
# 'Style {:s} not supported, must be one of {!r:s}'.format(
# style,
# list(despatch.keys())
# )
# )
# return self.line_decl, despatch[style](include_returns)
if style not in self.DOCSTRING_STYLE_FUNCTIONS:
raise ValueError(
'Style {:s} not supported, must be one of {!r:s}'.format(
style,
list(self.DOCSTRING_STYLE_FUNCTIONS.keys())
)
)
return self.line_decl, self.DOCSTRING_STYLE_FUNCTIONS[style](include_returns)