Merge pull request #169 from kylemanna/python3

[apps/agl-service-can-low-level.git] / generator / nanopb_generator.py
diff --git a/generator/nanopb_generator.py b/generator/nanopb_generator.py

index 6d06c2c..7fe0db9 100755 (executable)
--- a/generator/nanopb_generator.py
+++ b/generator/nanopb_generator.py
@@ -1,10 +1,13 @@
-#!/usr/bin/python
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
  
  '''Generate header file for nanopb from a ProtoBuf FileDescriptorSet.'''
-nanopb_version = "nanopb-0.3.4-dev"
+nanopb_version = "nanopb-0.3.5-dev"
  
  import sys
  import re
+from functools import reduce
  
  try:
      # Add some dummy imports to keep packaging tools happy.
@@ -82,7 +85,14 @@ class Names:
          return '_'.join(self.parts)
  
      def __add__(self, other):
-        if isinstance(other, (str, unicode)):
+        # The fdesc names are unicode and need to be handled for
+        # python2 and python3
+        try:
+              realstr = unicode
+        except NameError:
+              realstr = str
+
+        if isinstance(other, realstr):
              return Names(self.parts + (other,))
          elif isinstance(other, tuple):
              return Names(self.parts + other)
@@ -100,11 +110,14 @@ def names_from_type_name(type_name):
  
  def varint_max_size(max_value):
      '''Returns the maximum number of bytes a varint can take when encoded.'''
+    if max_value < 0:
+        max_value = 2**64 - max_value
      for i in range(1, 11):
          if (max_value >> (i * 7)) == 0:
              return i
      raise ValueError("Value too large for varint: " + str(max_value))
  
+assert varint_max_size(-1) == 10
  assert varint_max_size(0) == 1
  assert varint_max_size(127) == 1
  assert varint_max_size(128) == 2
@@ -120,7 +133,7 @@ class EncodedSize:
          self.symbols = symbols
      
      def __add__(self, other):
-        if isinstance(other, (int, long)):
+        if isinstance(other, int):
              return EncodedSize(self.value + other, self.symbols)
          elif isinstance(other, (str, Names)):
              return EncodedSize(self.value, self.symbols + [str(other)])
@@ -130,7 +143,7 @@ class EncodedSize:
              raise ValueError("Cannot add size: " + repr(other))
  
      def __mul__(self, other):
-        if isinstance(other, (int, long)):
+        if isinstance(other, int):
              return EncodedSize(self.value * other, [str(other) + '*' + s for s in self.symbols])
          else:
              raise ValueError("Cannot multiply size: " + repr(other))
@@ -168,6 +181,9 @@ class Enum:
                  return True
          return False
      
+    def encoded_size(self):
+        return max([varint_max_size(v) for n,v in self.values])
+    
      def __str__(self):
          result = 'typedef enum _%s {\n' % self.names
          result += ',\n'.join(["    %s = %d" % x for x in self.values])
@@ -186,6 +202,24 @@ class Enum:
          
          return result
  
+class FieldMaxSize:
+    def __init__(self, worst = 0, checks = [], field_name = 'undefined'):
+        if isinstance(worst, list):
+            self.worst = max(i for i in worst if i is not None)
+        else:
+            self.worst = worst
+
+        self.worst_field = field_name
+        self.checks = checks
+
+    def extend(self, extend, field_name = None):
+        self.worst = max(self.worst, extend.worst)
+
+        if self.worst == extend.worst:
+            self.worst_field = extend.worst_field
+
+        self.checks.extend(extend.checks)
+
  class Field:
      def __init__(self, struct_name, desc, field_options):
          '''desc is FieldDescriptorProto'''
@@ -254,7 +288,7 @@ class Field:
              raise NotImplementedError(field_options.type)
          
          # Decide the C data type to use in the struct.
-        if datatypes.has_key(desc.type):
+        if desc.type in datatypes:
              self.ctype, self.pbtype, self.enc_size, isa = datatypes[desc.type]
  
              # Override the field size if user wants to use smaller integers
@@ -267,7 +301,7 @@ class Field:
              self.ctype = names_from_type_name(desc.type_name)
              if self.default is not None:
                  self.default = self.ctype + self.default
-            self.enc_size = 5 # protoc rejects enum values > 32 bits
+            self.enc_size = None # Needs to be filled in when enum values are known
          elif desc.type == FieldD.TYPE_STRING:
              self.pbtype = 'STRING'
              self.ctype = 'char'
@@ -289,8 +323,8 @@ class Field:
          else:
              raise NotImplementedError(desc.type)
          
-    def __cmp__(self, other):
-        return cmp(self.tag, other.tag)
+    def __lt__(self, other):
+        return self.tag < other.tag
      
      def __str__(self):
          result = ''
@@ -354,12 +388,10 @@ class Field:
                  inner_init = '0'
          else:
              if self.pbtype == 'STRING':
-                inner_init = self.default.encode('utf-8').encode('string_escape')
-                inner_init = inner_init.replace('"', '\\"')
+                inner_init = self.default.replace('"', '\\"')
                  inner_init = '"' + inner_init + '"'
              elif self.pbtype == 'BYTES':
-                data = str(self.default).decode('string_escape')
-                data = ['0x%02x' % ord(c) for c in data]
+                data = ['0x%02x' % ord(c) for c in self.default]
                  if len(data) == 0:
                      inner_init = '{0, {0}}'
                  else:
@@ -461,15 +493,18 @@ class Field:
      def largest_field_value(self):
          '''Determine if this field needs 16bit or 32bit pb_field_t structure to compile properly.
          Returns numeric value or a C-expression for assert.'''
+        check = []
          if self.pbtype == 'MESSAGE':
              if self.rules == 'REPEATED' and self.allocation == 'STATIC':
-                return 'pb_membersize(%s, %s[0])' % (self.struct_name, self.name)
+                check.append('pb_membersize(%s, %s[0])' % (self.struct_name, self.name))
              elif self.rules == 'ONEOF':
-                return 'pb_membersize(%s, %s.%s)' % (self.struct_name, self.union_name, self.name)
+                check.append('pb_membersize(%s, %s.%s)' % (self.struct_name, self.union_name, self.name))
              else:
-                return 'pb_membersize(%s, %s)' % (self.struct_name, self.name)
+                check.append('pb_membersize(%s, %s)' % (self.struct_name, self.name))
  
-        return max(self.tag, self.max_size, self.max_count)        
+        return FieldMaxSize([self.tag, self.max_size, self.max_count],
+                            check,
+                            ('%s.%s' % (self.struct_name, self.name)))
  
      def encoded_size(self, dependencies):
          '''Return the maximum size that this field can take when encoded,
@@ -490,7 +525,8 @@ class Field:
                  encsize += varint_max_size(encsize.upperlimit())
              else:
                  # Submessage cannot be found, this currently occurs when
-                # the submessage type is defined in a different file.
+                # the submessage type is defined in a different file and
+                # not using the protoc plugin.
                  # Instead of direct numeric value, reference the size that
                  # has been #defined in the other file.
                  encsize = EncodedSize(self.submsgname + 'size')
@@ -499,6 +535,14 @@ class Field:
                  # prefix size, though.
                  encsize += 5
  
+        elif self.pbtype in ['ENUM', 'UENUM']:
+            if str(self.ctype) in dependencies:
+                enumtype = dependencies[str(self.ctype)]
+                encsize = enumtype.encoded_size()
+            else:
+                # Conservative assumption
+                encsize = 10
+
          elif self.enc_size is None:
              raise RuntimeError("Could not determine encoded size for %s.%s"
                                 % (self.struct_name, self.name))
@@ -544,7 +588,7 @@ class ExtensionRange(Field):
      def tags(self):
          return ''
      
-    def encoded_size(self, allmsgs):
+    def encoded_size(self, dependencies):
          # We exclude extensions from the count, because they cannot be known
          # until runtime. Other option would be to return None here, but this
          # way the value remains useful if extensions are not used.
@@ -624,9 +668,6 @@ class OneOf(Field):
          # Sort by the lowest tag number inside union
          self.tag = min([f.tag for f in self.fields])
  
-    def __cmp__(self, other):
-        return cmp(self.tag, other.tag)
-
      def __str__(self):
          result = ''
          if self.fields:
@@ -660,12 +701,15 @@ class OneOf(Field):
          return result
  
      def largest_field_value(self):
-        return max([f.largest_field_value() for f in self.fields])
+        largest = FieldMaxSize()
+        for f in self.fields:
+            largest.extend(f.largest_field_value())
+        return largest
  
-    def encoded_size(self, allmsgs):
+    def encoded_size(self, dependencies):
          largest = EncodedSize(0)
          for f in self.fields:
-            size = f.encoded_size(allmsgs)
+            size = f.encoded_size(dependencies)
              if size is None:
                  return None
              elif size.symbols:
@@ -812,13 +856,13 @@ class Message:
          result += '    PB_LAST_FIELD\n};'
          return result
  
-    def encoded_size(self, allmsgs):
+    def encoded_size(self, dependencies):
          '''Return the maximum size that this message can take when encoded.
          If the size cannot be determined, returns None.
          '''
          size = EncodedSize(0)
          for field in self.fields:
-            fsize = field.encoded_size(allmsgs)
+            fsize = field.encoded_size(dependencies)
              if fsize is None:
                  return None
              size += fsize
@@ -860,17 +904,17 @@ def toposort2(data):
      From http://code.activestate.com/recipes/577413-topological-sort/
      This function is under the MIT license.
      '''
-    for k, v in data.items():
+    for k, v in list(data.items()):
          v.discard(k) # Ignore self dependencies
-    extra_items_in_deps = reduce(set.union, data.values(), set()) - set(data.keys())
+    extra_items_in_deps = reduce(set.union, list(data.values()), set()) - set(data.keys())
      data.update(dict([(item, set()) for item in extra_items_in_deps]))
      while True:
-        ordered = set(item for item,dep in data.items() if not dep)
+        ordered = set(item for item,dep in list(data.items()) if not dep)
          if not ordered:
              break
          for item in sorted(ordered):
              yield item
-        data = dict([(item, (dep - ordered)) for item,dep in data.items()
+        data = dict([(item, (dep - ordered)) for item,dep in list(data.items())
                  if item not in ordered])
      assert not data, "A cyclic dependency exists amongst %r" % data
  
@@ -1121,20 +1165,17 @@ class ProtoFile:
                  yield '#error Properly detecting missing required fields in %s requires \\\n' % largest_msg.name
                  yield '       setting PB_MAX_REQUIRED_FIELDS to %d or more.\n' % largest_count
                  yield '#endif\n'
-        
-        worst = 0
-        worst_field = ''
-        checks = []
+
+        max_field = FieldMaxSize()
          checks_msgnames = []
          for msg in self.messages:
              checks_msgnames.append(msg.name)
              for field in msg.fields:
-                status = field.largest_field_value()
-                if isinstance(status, (str, unicode)):
-                    checks.append(status)
-                elif status > worst:
-                    worst = status
-                    worst_field = str(field.struct_name) + '.' + str(field.name)
+                max_field.extend(field.largest_field_value())
+
+        worst = max_field.worst
+        worst_field = max_field.worst_field
+        checks = max_field.checks
  
          if worst > 255 or checks:
              yield '\n/* Check that field information fits in pb_field_t */\n'
@@ -1222,7 +1263,7 @@ def read_options_file(infile):
          
          try:
              text_format.Merge(parts[1], opts)
-        except Exception, e:
+        except Exception as e:
              sys.stderr.write("%s:%d: " % (infile.name, i + 1) +
                               "Unparseable option line: '%s'. " % line +
                               "Error: %s\n" % str(e))
@@ -1290,6 +1331,9 @@ optparser.add_option("-e", "--extension", dest="extension", metavar="EXTENSION",
      help="Set extension to use instead of '.pb' for generated files. [default: %default]")
  optparser.add_option("-f", "--options-file", dest="options_file", metavar="FILE", default="%s.options",
      help="Set name of a separate generator options file.")
+optparser.add_option("-I", "--options-path", dest="options_path", metavar="DIR",
+    action="append", default = [],
+    help="Search for .options files additionally in this path")
  optparser.add_option("-Q", "--generated-include-format", dest="genformat",
      metavar="FORMAT", default='#include "%s"\n',
      help="Set format string to use for including other .pb.h files. [default: %default]")
@@ -1305,19 +1349,8 @@ optparser.add_option("-v", "--verbose", dest="verbose", action="store_true", def
  optparser.add_option("-s", dest="settings", metavar="OPTION:VALUE", action="append", default=[],
      help="Set generator option (max_size, max_count etc.).")
  
-def process_file(filename, fdesc, options):
-    '''Process a single file.
-    filename: The full path to the .proto or .pb source file, as string.
-    fdesc: The loaded FileDescriptorSet, or None to read from the input file.
-    options: Command line options as they come from OptionsParser.
-    
-    Returns a dict:
-        {'headername': Name of header file,
-         'headerdata': Data for the .h header file,
-         'sourcename': Name of the source code file,
-         'sourcedata': Data for the .c source code file
-        }
-    '''
+def parse_file(filename, fdesc, options):
+    '''Parse a single file. Returns a ProtoFile instance.'''
      toplevel_options = nanopb_pb2.NanoPBOptions()
      for s in options.settings:
          text_format.Merge(s, toplevel_options)
@@ -1335,18 +1368,20 @@ def process_file(filename, fdesc, options):
          optfilename = options.options_file
          had_abspath = True
  
-    if os.path.isfile(optfilename):
-        if options.verbose:
-            sys.stderr.write('Reading options from ' + optfilename + '\n')
-
-        Globals.separate_options = read_options_file(open(optfilename, "rU"))
+    paths = ['.'] + options.options_path
+    for p in paths:
+        if os.path.isfile(os.path.join(p, optfilename)):
+            optfilename = os.path.join(p, optfilename)
+            if options.verbose:
+                sys.stderr.write('Reading options from ' + optfilename + '\n')
+            Globals.separate_options = read_options_file(open(optfilename, "rU"))
+            break
      else:
          # If we are given a full filename and it does not exist, give an error.
          # However, don't give error when we automatically look for .options file
          # with the same name as .proto.
          if options.verbose or had_abspath:
-            sys.stderr.write('Options file not found: ' + optfilename)
-
+            sys.stderr.write('Options file not found: ' + optfilename + '\n')
          Globals.separate_options = []
  
      Globals.matched_namemasks = set()
@@ -1354,6 +1389,29 @@ def process_file(filename, fdesc, options):
      # Parse the file
      file_options = get_nanopb_suboptions(fdesc, toplevel_options, Names([filename]))
      f = ProtoFile(fdesc, file_options)
+    f.optfilename = optfilename
+    
+    return f
+
+def process_file(filename, fdesc, options, other_files = {}):
+    '''Process a single file.
+    filename: The full path to the .proto or .pb source file, as string.
+    fdesc: The loaded FileDescriptorSet, or None to read from the input file.
+    options: Command line options as they come from OptionsParser.
+    
+    Returns a dict:
+        {'headername': Name of header file,
+         'headerdata': Data for the .h header file,
+         'sourcename': Name of the source code file,
+         'sourcedata': Data for the .c source code file
+        }
+    '''
+    f = parse_file(filename, fdesc, options)
+
+    # Provide dependencies if available
+    for dep in f.fdesc.dependency:
+        if dep in other_files:
+            f.add_dependency(other_files[dep])
  
      # Decide the file names
      noext = os.path.splitext(filename)[0]
@@ -1364,7 +1422,7 @@ def process_file(filename, fdesc, options):
      # List of .proto files that should not be included in the C header file
      # even if they are mentioned in the source .proto.
      excludes = ['nanopb.proto', 'google/protobuf/descriptor.proto'] + options.exclude
-    includes = [d for d in fdesc.dependency if d not in excludes]
+    includes = [d for d in f.fdesc.dependency if d not in excludes]
      
      headerdata = ''.join(f.generate_header(includes, headerbasename, options))
      sourcedata = ''.join(f.generate_source(headerbasename, options))
@@ -1372,7 +1430,7 @@ def process_file(filename, fdesc, options):
      # Check if there were any lines in .options that did not match a member
      unmatched = [n for n,o in Globals.separate_options if n not in Globals.matched_namemasks]
      if unmatched and not options.quiet:
-        sys.stderr.write("Following patterns in " + optfilename + " did not match any fields: "
+        sys.stderr.write("Following patterns in " + f.optfilename + " did not match any fields: "
                           + ', '.join(unmatched) + "\n")
          if not Globals.verbose_options:
              sys.stderr.write("Use  protoc --nanopb-out=-v:.   to see a list of the field names.\n")
@@ -1407,14 +1465,15 @@ def main_cli():
  def main_plugin():
      '''Main function when invoked as a protoc plugin.'''
  
-    import sys
+    import io, sys
      if sys.platform == "win32":
          import os, msvcrt
          # Set stdin and stdout to binary mode
          msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
          msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
      
-    data = sys.stdin.read()
+    data = io.open(sys.stdin.fileno(), "rb").read()
+
      request = plugin_pb2.CodeGeneratorRequest.FromString(data)
      
      try:
@@ -1432,10 +1491,22 @@ def main_plugin():
      
      response = plugin_pb2.CodeGeneratorResponse()
      
+    # Google's protoc does not currently indicate the full path of proto files.
+    # Instead always add the main file path to the search dirs, that works for
+    # the common case.
+    import os.path
+    options.options_path.append(os.path.dirname(request.file_to_generate[0]))
+    
+    # Process any include files first, in order to have them
+    # available as dependencies
+    other_files = {}
+    for fdesc in request.proto_file:
+        other_files[fdesc.name] = parse_file(fdesc.name, fdesc, options)
+    
      for filename in request.file_to_generate:
          for fdesc in request.proto_file:
              if fdesc.name == filename:
-                results = process_file(filename, fdesc, options)
+                results = process_file(filename, fdesc, options, other_files)
                  
                  f = response.file.add()
                  f.name = results['headername']
@@ -1445,7 +1516,7 @@ def main_plugin():
                  f.name = results['sourcename']
                  f.content = results['sourcedata']    
      
-    sys.stdout.write(response.SerializeToString())
+    io.open(sys.stdout.fileno(), "wb").write(response.SerializeToString())
  
  if __name__ == '__main__':
      # Check if we are running as a plugin under protoc