Changeset 662 for proto/charsetcompiler


Ignore:
Timestamp:
Oct 15, 2010, 3:27:42 PM (9 years ago)
Author:
ksherdy
Message:

Remove redundant function comments. Replace manual string whitespace trimming function with standard Python library implementation.

Location:
proto/charsetcompiler
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/charset_input_parser.py

    r660 r662  
    11# charset_input_parser.py
     2#
     3# This library contains functions to parse line deliminated charset definitions
     4# of the form 'character class name = []' and produces (character class name, character class item list).
    25#
    3 #
    4 # This library can be used to parse a charset declaration and produce a pair of (name, items)
    5 # It contains the following functions:
    6 #
    7 # 1. report_CharSetDef(charset_declaration_list):
    8 #       Prints out name and items of each pair of charset in the given list.
    9 #
    10 # 2. split(statement)
    11 #       Splits up a statement (or line) and returns a list of at most 3 elements.
    12 #       The first token will be the first element of the list.
    13 #       The second token will be the second element of the list.
    14 #       The rest will be the third element of the list
    15 #
    16 # 3. checkValidDeclaration(statement, token_list)
    17 #       Checks if a given statement (of type string) contains a valid character set declaration.
    18 #       The expected declaration is:
    19 #           charsetname = [regular expression range]
    20 #
    21 # 5. genCharSetItems(token)
    22 #       Generates a list of items from a given token (of type string).
    23 #       eg. input = "[A-Za-z_]"
    24 #           output = ['a-z', 'A-Z', '_']
    25 #
    26 # 6. parseCharsetInput(string)
    27 #       Takes a line of charset declaration as an argument and generates a list of (name, items) pair.
    28 #       Returns an empty pair if the charset declared in the file is not valid.
    29 #
    30 # 7. processCharsetInput(input_filename):
    31 #       Takes input_filename as an argument and reads the file to generate the charset items and its name.
    32 #       Returns a list containing pairs of (name, items) to be passed to the CharSetDef class
    33 #       Expected input file:
    34 #               charsetname = [regular expression range]
    35 #               charsetname = [regular expression range]
    36 #       Note the spaces and new line. Invalid declarations will be ignored.
    37 #
    38 
    396
    407def report_CharSetDef(charset_declaration_list):
    418        """
    42         Prints out name and items of each pair of charset in the given list.
     9        Diagnostic function. Prints out the character class name and character class item list for each
     10  charset definition.
    4311        """
    4412        print "-----------CharSetDef-----------"
     
    4917def split(statement):
    5018        """
    51         Splits up a statement (or line) with '=' as the delimiter and returns a list of at most 2 elements.
    52         The first token will be the first element of the list.
    53         The rest will be the second element of the list.
     19        Splits a charset definitaion statement on the first occurence of '='
     20  and returns a two item token list.
    5421        """
    5522        if len(statement)==0:
     
    6633        tokens_list.append (statement[start:])
    6734       
    68         #get rid of spaces at the beginning and the end of each token
     35        # trim spaces
    6936        for i in range(0,len(tokens_list)):
    70             tokens_list[i] = removeSpaces(tokens_list[i])
     37            tokens_list[i] = tokens_list[i].strip()
    7138           
    7239        return tokens_list
     
    7441def checkValidDeclaration(statement):
    7542        """
    76         Checks if a given statement (of type string) contains a valid character set declaration.
    77         The expected declaration is:
    78                 charsetname = [regular expression range]
    79         NOTE: This function does not check an invalid range. If there is a hypen at the beginning
     43        Validates character set definition statement syntax as 'character class name = [character or range expression]'
     44       
     45  WARNING: This function does not check an invalid range. If there is a hypen at the beginning
    8046                or the end of the declaration, it will be considered as a hypen character
    8147              Eg. [A-Z-] is considered as ['A-Z', '-']
     
    11581                index += 1
    11682        return items
    117 
    118 def removeSpaces(string):
    119         """
    120         Return a string with removed spaces at the beginning and the end of string
    121         """
    122         start = 0
    123         end = len(string)-1
    124         while (string[start] == ' '):
    125                 start += 1
    126                
    127         while (string[end] == ' '):
    128                 end -= 1
    129         return string [start:end+1]
    13083       
    13184def parseCharsetInput(string):
  • proto/charsetcompiler/inputs/input_Parabix2

    r609 r662  
    77lex.Exclam = [!]
    88lex.QMark = [?]
    9 lex.Hyphen = [-]
     9lex.Hyphen =  [-]
    1010lex.Equals = [=]
    1111lex.SQuote = [']
Note: See TracChangeset for help on using the changeset viewer.