初学Python的时候写的呀!
简单的爬虫,简单的文本操作。
Scrapy
https://tieba.baidu.com/p/5127857460?fr=ala0&pstaala=2&tpl=5
https://tieba.baidu.com/p/5127857460?pn=1
pn为页数
<img class="BDE_Image" src="https://******.jpg" size="34875" width="440" height="440">
图片地址用正则表达式取出 src="和size中间部分
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
import urllib.request
import re
def get_img(url):
data = urllib.request.urlopen(url)
#实现对目标url的访问,文件对象
html = data.read()
#读取
html = html.decode('utf-8')
#编码方式 TypeError: can't use a string pattern on a bytes-like object
img = re.compile(r'src="(.+?\.jpg)" size')
#正则表达式
imglist = re.findall(img,html)
#以列表的形式返回能匹配的子串
x = 1
for imgurl in imglist:
urllib.request.urlretrieve(imgurl,'pic\%s.jpg' %x) #下载到本地
x += 1
y = x -1
print('下载%s张图片完成.'%y)
url = "https://tieba.baidu.com/p/5127857460?fr=ala0&pstaala=2&tpl=5"
get_img(url)
|
文本操作
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
def gl(path,context):
f = open(path,'a')
f.write("张三\n")
f.close()
f = open(path,'r')
words = f.readlines()
for word in words:
word = word.strip()
context = context.replace(word,'XX')
f.close()
print(context)
path = 'C:/Users/john/workspace/PythonDemo4/src/words.txt'
context = input('输入一句话:')
gl(path,context)
|