pyparsing recursion of values list (ibm rhapsody)
我正在为IBM Rhapsody
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | import pyparsing as pp import pprint TEST = r"""{ foo - key = bla; - value = 1243; 1233; 1235; - _hans ="hammer time"; - HaMer = 765; 786; 890; - value =" #pragma LINK_INFO DERIVATIVE \"mc9s12xs256\" "; - _mText = 12.11.2015::13:20:0; - value ="war";"fist"; - _obacht ="fish,car,button"; - _id = gibml c0d8-4535-898f-968362779e07; - bam = { boing - key = bla; } { boing - key = bla; } } """ def flat(loc, toks): if len(toks[0]) == 1: return toks[0][0] assignment = pp.Suppress("-") + pp.Word(pp.alphanums +"_") + pp.Suppress("=") value = pp.OneOrMore( pp.Group(assignment + ( pp.Group(pp.OneOrMore( pp.QuotedString('"', escChar="\\\", multiline=True) + pp.Suppress(";"))).setParseAction(flat) | pp.Word(pp.alphas) + pp.Suppress(";") | pp.Word(pp.printables +"") )) ) expr = pp.Forward() expr = pp.Suppress("{") + pp.Word(pp.alphas) + ( value | (assignment + expr) | expr ) + pp.Suppress("}") expr = expr.ignore(pp.pythonStyleComment) print TEST pprint.pprint(expr.parseString(TEST).asList()) |
输出:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | % python prase.py { foo - key = bla; - value = 1243; 1233; 1235; - _hans ="hammer time"; - HaMer = 765; 786; 890; - value =" #pragma LINK_INFO DERIVATIVE \"mc9s12xs256\" "; - _mText = 12.11.2015::13:20:0; - value ="war";"fist"; - _obacht ="fish,car,button"; - _id = gibml c0d8-4535-898f-968362779e07; - bam = { boing - key = bla; } { boing - key = bla; } } ['foo', ['key', 'bla'], ['value', '1243; 1233; 1235;'], ['_hans', 'hammer\ time'], ['HaMer', '765; 786; 890;'], ['value', '\ #pragma LINK_INFO DERIVATIVE"mc9s12xs256"\ '], ['_mText', '12.11.2015::13:20:0;'], ['value', ['war', 'fist']], ['_obacht', 'fish,car,button'], ['_id', 'gibml c0d8-4535-898f-968362779e07;'], ['bam', '{ boing'], ['key', 'bla']] |
哇,那是一种凌乱的模型格式!我想这会让您接近。我首先尝试描述有效值表达式的特征。我看到每个分组都可以包含以';'结尾的属性定义,或以'{}'包围的嵌套对象。每个对象都包含一个给出对象类型的前导标识符。
棘手的问题是非常通用的令牌,我将其命名为" value_word",几乎不是任何字符组,只要它不是"-"," {"或"}"即可。" value_word"的定义中的否定先行将解决此问题。我认为这里的关键问题是我无法在'value_word'中包含''作为有效字符,而是让pyparsing进行默认的空白跳过,并可能使一个或多个'value_word'组成一个' attr_value"。
最后一个踢腿(在测试用例中未找到,但在您链接到的示例中)是属性" assignment"的这一行:
1 | - m_pParent = ; |
因此attr_value还必须允许一个空字符串。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | from pyparsing import * LBRACE,RBRACE,SEMI,EQ,DASH = map(Suppress,"{};=-") ident = Word(alphas + '_', alphanums+'_').setName("ident") guid = Group('GUID' + Combine(Word(hexnums)+('-'+Word(hexnums))*4)) qs = QuotedString('"', escChar="\\\", multiline=True) character_literal = Combine("'" + oneOf(list(printables+' ')) +"'") value_word = ~DASH + ~LBRACE + ~RBRACE + Word(printables, excludeChars=';').setName("value_word") value_atom = guid | qs | character_literal | value_word object_ = Forward() attr_value = OneOrMore(object_) | Optional(delimitedList(Group(value_atom+OneOrMore(value_atom))|value_atom, ';')) + SEMI attr_value.setName("attr_value") attr_defn = Group(DASH + ident("name") + EQ + Group(attr_value)("value")) attr_defn.setName("attr_defn") object_ <<= Group( LBRACE + ident("type") + Group(ZeroOrMore(attr_defn | object_))("attributes") + RBRACE ) object_.parseString(TEST).pprint() |
对于您的测试字符串,它给出:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | [['foo', [['key', ['bla']], ['value', ['1243', '1233', '1235']], ['_hans', ['hammer\ time']], ['HaMer', ['765', '786', '890']], ['value', ['\ #pragma LINK_INFO DERIVATIVE"mc9s12xs256"\ ']], ['_mText', ['12.11.2015::13:20:0']], ['value', ['war', 'fist']], ['_obacht', ['fish,car,button']], ['_id', [['gibml', 'c0d8-4535-898f-968362779e07']]], ['bam', [['boing', [['key', ['bla']]]], ['boing', [['key', ['bla']]]]]]]]] |
我添加了可能有助于处理这些结构的结果名称。使用
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | [['foo', [['key', ['bla']], ['value', ['1243', '1233', '1235']], ['_hans', ['hammer\ time']], ... [0]: ['foo', [['key', ['bla']], ['value', ['1243', '1233', '1235']], ['_hans', ['hammer\ time']], ... - attributes: [['key', ['bla']], ['value', ['1243', '1233', '1235']], ['_hans', ['hammer... [0]: ['key', ['bla']] - name: key - value: ['bla'] [1]: ['value', ['1243', '1233', '1235']] - name: value - value: ['1243', '1233', '1235'] [2]: ['_hans', ['hammer\ time']] - name: _hans - value: ['hammer\ time'] [3]: ['HaMer', ['765', '786', '890']] - name: HaMer - value: ['765', '786', '890'] [4]: ['value', ['\ #pragma LINK_INFO DERIVATIVE"mc9s12xs256"\ ']] - name: value - value: ['\ #pragma LINK_INFO DERIVATIVE"mc9s12xs256"\ '] [5]: ['_mText', ['12.11.2015::13:20:0']] - name: _mText - value: ['12.11.2015::13:20:0'] [6]: ['value', ['war', 'fist']] - name: value - value: ['war', 'fist'] [7]: ['_obacht', ['fish,car,button']] - name: _obacht - value: ['fish,car,button'] [8]: ['_id', [['gibml', 'c0d8-4535-898f-968362779e07']]] - name: _id - value: [['gibml', 'c0d8-4535-898f-968362779e07']] [0]: ['gibml', 'c0d8-4535-898f-968362779e07'] [9]: ['bam', [['boing', [['key', ['bla']]]], ['boing', [['key', ['bla']]]]]] - name: bam - value: [['boing', [['key', ['bla']]]], ['boing', [['key', ['bla']]]]] [0]: ['boing', [['key', ['bla']]]] - attributes: [['key', ['bla']]] [0]: ['key', ['bla']] - name: key - value: ['bla'] - type: boing [1]: ['boing', [['key', ['bla']]]] - attributes: [['key', ['bla']]] [0]: ['key', ['bla']] - name: key - value: ['bla'] - type: boing - type: foo |
一旦删除了主要版本行,它也会成功解析链接的示例。