关于linux：循环访问SQLite数据库时Python脚本被锁定

Python script is locked when accessing SQLite database in loop

请仔细阅读解析器的代码。它从访问循环的网页中获取一些统计信息，并将指定的记录放入SQLite3数据库中。

一切正常，直到第87行(SQL语句)为止，该行占用了所有CPU资源，并且实际上被阻塞了。

File"./parser.py", line 86, in
while (j < i):

代码开头的数据库文件是用正确的结构创建的，因此问题出在循环中。主循环for season in season_list:的内部块工作正常。这是我脚本的完整代码：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

#!/usr/bin/env python
from bs4 import BeautifulStoneSoup
from urllib2 import urlopen
import re
import sqlite3
from time import gmtime, strftime

# Print start time
print"We started at", strftime("%Y-%m-%d %H:%M:%S", gmtime())

# Create DB
print"Trying to create DB"
con = sqlite3.connect('england.db')
cur = con.cursor()
sql ="""\\
CREATE TABLE english_premier_league (
id_match INTEGER PRIMARY KEY AUTOINCREMENT,
season TEXT,
tour INTEGER,
date TEXT,
home TEXT,
visitor TEXT,
home_score INTEGER,
visitor_score INTEGER
);
"""
try:
cur.executescript(sql)
except sqlite3.DatabaseError as err:
print"Error creating database:", err
else:
print"Succesfully created your database..."
con.commit()
cur.close()
con.close()

# list of variables
postfix = 2011
threshold = 1999
season_list = []
while postfix >= threshold:
end = (postfix + 1) % 2000
if (end >= 10):
season = str(postfix) + str(end)
else:
season = str(postfix) + str(0) + str(end)
season_list.append(season)
postfix -= 1
print season_list

# main loop
for season in season_list:
href = 'http://www.stat-football.com/en/a/eng.php?b=10&d='+season+'&c=51'
print href
xml = urlopen(href).read()
xmlSoup = BeautifulStoneSoup(xml)
tablet = xmlSoup.find(attrs={"class" :"bd5"})

#Access DB
con = sqlite3.connect('england.db')
cur = con.cursor()

#Parse site
tour = tablet.findAll(attrs = {"class" : re.compile(r"^(s3|cc s3)$") })
date = tablet.findAll(text = re.compile(r"(0[1-9]|[12][0-9]|3[01])\\.(0[1-9]|1[012])\\.(19|20)\\d\\d"))
home = tablet.findAll(attrs = {"class" :"nw"})
guest = tablet.findAll(attrs = {"class" :"s1"})
score = tablet.findAll(attrs = {"class" :"nw pr15"})

#
def parse_string(sequence):
result=[]
for unit in sequence:
text = ''.join(unit.findAll(text=True))
result.append(text.strip())
return result

tour_list=parse_string(tour)
home_list=parse_string(home)
guest_list=parse_string(guest)
score_list=parse_string(score)

#Loop over found records to put them into sqlite3 DB
i = len(tour_list)
j = 0
while (j < i):
sql_add = 'INSERT INTO english_premier_league (season, tour, date, home, visitor, home_score, visitor_score) VALUES (?, ?, ?, ?, ?, ?, ?)'
match = (season, int(tour_list[j]), date[j], home_list[j], guest_list[j], int(score_list[j][0:1]), int(score_list[j][2:3]))
try:
cur.executemany(sql_add, match)
except sqlite3.DatabaseError as err:
print"Error matching the record:", err
else:
con.commit()
part = float(j)/float(i)*100
if (part%10 == 0):
print (int(part)),"%"
j += 1
cur.close()
con.close()

另外，查看strace输出的末尾可能会很有用：

getcwd("/home/vitaly/football_forecast/epl", 512) = 35
stat("/home/vitaly/football_forecast/epl/england.db",
{st_mode=S_IFREG|0644, st_size=24576, ...}) = 0
open("/home/vitaly/football_forecast/epl/england.db", O_RDWR|O_CREAT,
0644) = 3 fcntl(3, F_GETFD) = 0 fcntl(3,
F_SETFD, FD_CLOEXEC) = 0 fstat(3, {st_mode=S_IFREG|0644,
st_size=24576, ...}) = 0 lseek(3, 0, SEEK_SET) = 0
read(3,"SQLite format 3\\0\\4\\0\\1\\1\\0@ \\0\\0\\1~\\0\\0\\0\\30"..., 100) =
100

我正在Ubuntu 12.04上运行Python 2.7。非常感谢。