关于XML:使用python时如何在另一个函数中传递函数

How to pass a function within another function when using Python

以下是我正在使用的csv文件:

1
2
3
4
5
`"A","B","C","D","E","F","G","H","I","J"

"88",18,1,"<Req TID=""34"" ReqType=""MS""><IISO /><CID>2</CID><MemID>0000</MemID><MemPass /><RequestData><S>[REMOVED]</S><Na /><La /><Card>[REMOVED]</Card><Address /><HPhone /><Mail /></ReqData></Req>","<Response T=""3"" RequestType=""MS""><MS><Memb><PrivateMembers /><Ob>0-12-af</Ob><Locator /></Memb><S>[REMOVED]</S><CNum>[REMOVED]</CNum><FName /><LaName /><Address /><HPhone /><Email /><IISO /><MemID /><MemPass /><T /><CID /><T /></MS></Response>",0-JAN-10 12.00.02 AM,27-JUN-15 12.00.00 AM,"26",667,0
"22",22,1,"<Req TID=""45"" ReqType=""MS""><IISO /><CID>4</CID><MemID>0000</MemID><MemPass /><RequestData><S>[REMOVED]</S><Na /><La /><Card>[REMOVED]</Card><Address /><HPhone /><Mail /></ReqData></Req>","<Response T=""10"" RequestType=""MS""><MS><Memb><PrivateMembers /><Ob>0-12-af</Ob><Locator /></Memb><S>[REMOVED]</S><CNum>[REMOVED]</CNum><FName /><LaName /><Address /><HPhone /><Email /><IISO /><MemID /><MemPass /><T /><CID /><T /></MS></Response>",0-JAN-22 12.00.02 AM,27-JUN-22 12.00.00 AM,"26",667,0
"32",22,1,"<Req TID=""15"" ReqType=""MS""><IISO /><CID>45</CID><MemID>0000</MemID><MemPass /><RequestData><S>[REMOVED]</S><Na /><La /><Card>[REMOVED]</Card><Address /><HPhone /><Mail /></ReqData></Req>","<Response T=""10"" RequestType=""MS""><MS><Memb><PrivateMembers /><Ob>0-12-af</Ob><Locator /></Memb><S>[REMOVED]</S><CNum>[REMOVED]</CNum><FName /><LaName /><Address /><HPhone /><Email /><IISO /><MemID /><MemPass /><T /><CID /><T /></MS></Response>",0-JAN-20 12.00.02 AM,27-JUN-34 12.00.00 AM,"26",667,0`

下面的函数是注释的。简而言之,函数get_clientresponses_two读取上述csv,选择列e的数据实例(xml data),有两个生成器函数来解析**列e*中的xml数据,以便将xml标记及其文本转换为python字典。具体来说,flatten_dict()函数返回(key,value)对的一个可重复序列。你可以把它变成一个与list(flatten_dict(root))配对的列表。

到目前为止,它的输出是生成一个字典。然后,def allocate_and_write_data_将这些数据收集起来,创建两个不同的集合。一个是使用flatten_dict(中的键更新的集合。这是为了确保XML中的元素标记包含在新编写的csv的头文件中(连同它们的对应值)。编写代码是为了维护头的完整性(无重复项),并允许将新的元素标记转换为头(及其值)。此外,已经存在的头和值应该足够灵活,可以用新实例更新(同样也是唯一的)。此外,所有其他行都将被存储和更新。然后,我将头文件转换成一个列表,并确保使用列表理解data来解释(带有'')任何丢失的数据实例。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import csv
from collections import OrderedDict
from xml.etree.ElementTree import ParseError
import collections
from __future__ import print_function

def get_clientresponses_2(filename = 's.csv'):

    with open(filename, 'rU') as infile:
        reader = csv.DictReader(infile)         # read the file as a dictionary for each row ({header : value})
        data = {}
        for row in reader:
            for header, value in row.items():
                try:
                    data[header].append(value)
                except KeyError:
                    data[header] = [value]

        client_responses = data['E'] #returns a list
        for client_response in client_responses:
            xml_string = (''.join(client_response))
            xml_string = xml_string.replace('&amp;', '')
            try:
                root = ElementTree.XML(xml_string)
                print(root)
            except ET.ParseError:
                print("catastrophic failure")
                continue

def allocate_and_write_2(get_clientresponses_2_gen):

    with open(filename, 'r') as infile:
        reader = csv.DictReader(infile)         # read the file as a dictionary for each row ({header : value})
        header = set()
        results = []
        #     data = {} # this is not needed for the purpose of this organization
        for row in reader:
            for get_clientresponses_2 in get_clientresponses_2_gen:
                xml_data = get_clientresponses_2()
                row.update(xml_data)        # just for XML data
                results.append(row)         # everything else
                header.update(row.keys())  # can't forget headers

    #     print(row) # returns dictionary of key values pairs (headers : values)
    #     print(results) # returns list wrapper for dictionary
    #     print(headers) #returns set of all headers
        headers_list = list(header)
    #     print(headers_list) #list form of set

        with open('csv_output.csv', 'wt') as f:
            writer = csv.writer(f)
            writer.writerow(headers_list)
            for row in results:
                data = [row.get(x, '') for x in headers_list]
                writer.writerow(data)
    #             writer.writerows(zip(headers_list, data))

输出如下:

1
2
3
4
C,HPhone,Locator,IISO,E,S,FName,LaName,J,D,MemID,ResponseRequestType,T,Email,I,Ob,G,MemPass,Address,A,PrivateMembers,H,CNum,ResponseT,CID,B,F
1,,,,"<Response T=""3"" RequestType=""MS""><MS><Memb><PrivateMembers /><Ob>0-12-af</Ob><Locator /></Memb><S>[REMOVED]</S><CNum>[REMOVED]</CNum><FName /><LaName /><Address /><HPhone /><Email /><IISO /><MemID /><MemPass /><T /><CID /><T /></MS></Response>",[REMOVED],,,0,"<Req TID=""34"" ReqType=""MS""><IISO /><CID>2</CID><MemID>0000</MemID><MemPass /><RequestData><S>[REMOVED]</S><Na /><La /><Card>[REMOVED]</Card><Address /><HPhone /><Mail /></ReqData></Req>",,MS,,,667,0-12-af,27-JUN-15 12.00.00 AM,,,88,,26,[REMOVED],10,,18,0-JAN-10 12.00.02 AM
1,,,,"<Response T=""10"" RequestType=""MS""><MS><Memb><PrivateMembers /><Ob>0-12-af</Ob><Locator /></Memb><S>[REMOVED]</S><CNum>[REMOVED]</CNum><FName /><LaName /><Address /><HPhone /><Email /><IISO /><MemID /><MemPass /><T /><CID /><T /></MS></Response>",[REMOVED],,,0,"<Req TID=""45"" ReqType=""MS""><IISO /><CID>4</CID><MemID>0000</MemID><MemPass /><RequestData><S>[REMOVED]</S><Na /><La /><Card>[REMOVED]</Card><Address /><HPhone /><Mail /></ReqData></Req>",,MS,,,667,0-12-af,27-JUN-22 12.00.00 AM,,,22,,26,[REMOVED],10,,22,0-JAN-22 12.00.02 AM
1,,,,"<Response T=""10"" RequestType=""MS""><MS><Memb><PrivateMembers /><Ob>0-12-af</Ob><Locator /></Memb><S>[REMOVED]</S><CNum>[REMOVED]</CNum><FName /><LaName /><Address /><HPhone /><Email /><IISO /><MemID /><MemPass /><T /><CID /><T /></MS></Response>",[REMOVED],,,0,"<Req TID=""15"" ReqType=""MS""><IISO /><CID>45</CID><MemID>0000</MemID><MemPass /><RequestData><S>[REMOVED]</S><Na /><La /><Card>[REMOVED]</Card><Address /><HPhone /><Mail /></ReqData></Req>",,MS,,,667,0-12-af,27-JUN-34 12.00.00 AM,,,32,,26,[REMOVED],10,,22,0-JAN-20 12.00.02 AM

但是,当我试图在'allocate_and_write'内调用'get_clientresponses_two'时,收到以下错误:

1
2
3
4
5
6
7
8
<ipython-input-91-cfd866a1c0b6> in allocate_and_write_2(get_clientresponses_2_gen)
     37         #     data = {} # this is not needed for the purpose of this organization
     38         for row in reader:
---> 39             for get_clientresponses_2 in get_clientresponses_2_gen:
     40                 xml_data = get_clientresponses_2()
     41                 row.update(xml_data)        # just for XML data

TypeError: 'function' object is not iterable

基于我对这个论坛上的生成器和其他帖子的理解,我知道这是由于这个问题。我想通过传递第一个函数的输出(get_clientresponses_two的输出)来迭代生成器的输出,同时实现另一个函数。我希望得到指导和反馈,特别是如何最好地纠正这一点。


感谢@anandskumar的指导:

这确实是由于我如何在生成器函数的上下文中使用迭代器构造函数。我用阿南德的建议替换了我原来的剧本:

1
2
for xml_data in get_clientresponses_2():
     xml_dat =  dict(flatten_dict(xml_data))

但是,我还必须通过返回每个XML树的根并将其传递给allocate_and_write(),来修改get_clientresponses_2

为了防止任何副作用,我把它们保留为两个互斥的功能。

您只需在

1
2
if __name__ =="__main__":
    main()

我这样做的原因在这里有详细说明

以下是两个功能套件:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import csv
from collections import OrderedDict
from xml.etree.ElementTree import ParseError
import collections
from __future__ import print_function


def get_clientresponses_2(filename = 's.csv'):

    with open(filename, 'rU') as infile:
        reader = csv.DictReader(infile)         # read the file as a dictionary for each row ({header : value})
        data = {}
        for row in reader:
            for header, value in row.items():
                try:
                    data[header].append(value)
                except KeyError:
                    data[header] = [value]

        client_responses = data['E'] #returns a list
        for client_response in client_responses:
            xml_string = (''.join(client_response))
            xml_string = xml_string.replace('&amp;', '')
            try:
                root = ElementTree.XML(xml_string)
#                 print(root)
                return root
            except ET.ParseError:
                print("catastrophic failure")
                continue

def allocate_and_write_2():

    with open(filename, 'r') as infile:
        reader = csv.DictReader(infile)         # read the file as a dictionary for each row ({header : value})
        header = set()
        results = []
        #     data = {} # this is not needed for the purpose of this organization
        for row in reader:
            for xml_data in get_clientresponses_2():
                xml_dat =  dict(flatten_dict(xml_data))
                row.update(xml_dat)        # just for XML data
                results.append(row)         # everything else
                header.update(row.keys())  # can't forget headers

        #     print(row) # returns dictionary of key values pairs (headers : values)
        #     print(results) # returns list wrapper for dictionary
        #     print(headers) #returns set of all headers
            headers_list = list(header)
        #     print(headers_list) #list form of set

            with open('csv_output.csv', 'wt') as f:
                writer = csv.writer(f)
                writer.writerow(headers_list)
                for row in results:
                    data = [row.get(x, '') for x in headers_list]
#                     print(data)
                    writer.writerow(data)
    #             writer.writerows(zip(headers_list, data))

根据注释,似乎get_clientresponses_2_gen是一个生成器函数,如果是,那么这不是迭代生成器函数的方式。-

1
2
for get_clientresponses_2 in get_clientresponses_2_gen:
    xml_data = get_clientresponses_2()

要遍历generator函数并将每个值放入xml_data中,请使用-

1
for xml_data in get_clientresponses_2_gen():

(将此替换为上面给出的两行)。