Changeset 854 for proto/charsetcompiler


Ignore:
Timestamp:
Dec 23, 2010, 3:59:54 PM (9 years ago)
Author:
ksherdy
Message:

Removed leading spaces on bitstream statements. Implemented regex range checker.

Location:
proto/charsetcompiler
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/charset_compiler.py

    r716 r854  
    642642    def add_assignment(self, varname, expr):
    643643        self.common_expression_map[expr] = varname
    644         self.generated_code.append('  %s%s = %s;\n' % (self.typedecl, varname, expr))
     644        self.generated_code.append('%s%s = %s;\n' % (self.typedecl, varname, expr))
    645645    def expr_string_to_variable(self, expr_string):
    646646        if self.common_expression_map.has_key(expr_string):
  • proto/charsetcompiler/charset_input_parser.py

    r676 r854  
    77
    88debug = False
    9 
     9       
    1010def report_CharSetDef(charset_declaration_list):
    1111        """
     
    2020def split(statement):
    2121        """
    22         Splits a charset definitaion statement on the first occurence of '='
    23   and returns a two item token list.
     22        Splits a charset definition statement on the first occurence of '='
     23        and returns a two item token list.
    2424        """
    2525        if len(statement)==0:
    2626            return
    2727       
    28         tokens_list = []
    29         start = 0
    30         equal_index = statement.find ("=", start)
    31         if equal_index > -1: #means that we found '='!
    32             tokens_list.append (statement[start:equal_index])
    33             start = equal_index + 1
     28        tokens_tuple = statement.partition('=')
     29        tokens_list = []
    3430
    35         #append the remaining string to as the last member of the list
    36         tokens_list.append (statement[start:])
     31        # don't append the delimiter ('=') to the list
     32        tokens_list.append(tokens_tuple[0])
     33        tokens_list.append(tokens_tuple[2])
    3734       
    3835        # trim spaces
     
    4138           
    4239        return tokens_list
     40       
    4341
    44 def checkValidDeclaration(statement):
     42def isValidDeclaration(statement):
    4543        """
    4644        Validates character set definition statement syntax as 'character class name = [character or range expression]'
    47        
    48   WARNING: This function does not check an invalid range. If there is a hypen at the beginning
    49                 or the end of the declaration, it will be considered as a hypen character
    50               Eg. [A-Z-] is considered as ['A-Z', '-']
     45        WARNING: This function does not validate the regular expression on the definition statement. The task will be done by genCharSetItems.
    5146        """
    5247        # split up the string to a list of tokens
     
    6358        return True
    6459   
    65 def genCharSetItems(token):
     60def genCharSetItems(token, items):
    6661        """
    6762        Generates a list of items from a given token (of type string).
    68         eg. input = "[A-Za-z_]"
     63        Returns true if the regular expression is valid.
     64        Eg. input = "[A-Za-z_]"
    6965            output = ['a-z', 'A-Z', '_']
     66        Example of invalid regular expression: [Z-A]
    7067        """
    71         items = []
    72         token_length = len(token)
     68        token_length = len(token) - 1
    7369       
    7470        # let's process the items and append into a list (items)
    7571        # check from index one to length-1 because we want to skip the square brackets
    7672        index = 1
    77         while index < token_length-1:
    78             # range case: we want the pattern of a-b where b is not -
    79             if index+2 < token_length and (token[index+1] == '-' and token[index+2] != '-'):
    80                 items.append(token[index:index+3])
    81                 index += 3
     73       
     74        while index < token_length:
     75
     76            # range case: we want the pattern of a-b where a is not '-' and a <= b
     77            if index+2 < token_length and token [index+1] == '-':
     78                   
     79                # allows range whose starting point is '-' if it is declared at the beginning of the list
     80                if (token [index] == '-' and index == 1) or token [index] != '-':
     81                   if isValidCharacterRange(token[index], token[index+2]):
     82                      items.append(token[index:index+3])
     83                      index += 3
     84                         
     85                   else:
     86                      print "Invalid range: " + token[index:index+3] + ", starting point is greater than ending point."
     87                      return False
     88                     
     89                else:
     90                   print "Invalid range: " + token[index:index+3] + ", starting point of a range cannot be '-' if it is not at the beginning of the list."
     91                   return False                               
     92
    8293            else:
    8394                items.append(token[index])
    8495                index += 1
    85         return items
     96
     97        return True
     98
     99def isValidCharacterRange(c1, c2):
     100        """
     101        Takes two characters: starting point, c1, and ending point, c2.
     102        Returns true if c1 <= c2, returns false otherwise.
     103        """
     104        return ord(c1) <= ord(c2)
    86105       
    87106def parseCharsetInput(string):
     
    92111        if len(string) == 0:
    93112            return
    94        
     113
    95114        # split up the string to a list of tokens
    96         if checkValidDeclaration(string):
     115        if isValidDeclaration(string):
    97116            tokens_list = split (string)
    98117            # get the items and store the (name, items) pair to the charset_declaration_list
    99             items = genCharSetItems(tokens_list[1])
    100             return (tokens_list[0], items)
     118            items = []
     119            if genCharSetItems(tokens_list[1], items):
     120               return (tokens_list[0], items)
     121            else:
     122               print "Invalid regular expression: " + string
     123               return ()
    101124        else:
    102             print "Invalid declaration: " + string
    103         return ()
     125            print "Invalid charset declaration: " + string
     126            return ()
     127
    104128
    105129def processCharsetInput(input_filename):
     
    118142        while (string != ""):
    119143                string = string.decode('string_escape')
    120                
    121                 # check if the last character a new line (\n) character
    122                 if string[-1] == '\n':
    123                         string = string [:-1]
    124                 if len(string) != 0:
    125                         # get the pair of name and items from the declared charsets
    126                         pair = parseCharsetInput(string)
    127                         if len(pair) == 2:
    128                                 charset_declaration_list.append(pair)
    129144
     145                # '#' indicates comment
     146                if string[0] != '#': 
     147                   # check if the last character a new line (\n) character
     148                   if string[-1] == '\n':
     149                      string = string [:-1]
     150                   if len(string) != 0:
     151                      # get the pair of name and items from the declared charsets
     152                      pair = parseCharsetInput(string)
     153                      if len(pair) == 2:
     154                         charset_declaration_list.append(pair)
     155                else:
     156                   #comment case
     157                   pass
     158                   
    130159                string = input_handle.readline()
    131160
Note: See TracChangeset for help on using the changeset viewer.