Python学习练习(3)

2017-06-10 约 325 字预计阅读 1 分钟次阅读

初学Python的时候写的呀！简单的爬虫，简单的文本操作。

Scrapy

https://tieba.baidu.com/p/5127857460?fr=ala0&pstaala=2&tpl=5 https://tieba.baidu.com/p/5127857460?pn=1 pn为页数 <img class="BDE_Image" src="https://******.jpg" size="34875" width="440" height="440"> 图片地址用正则表达式取出 src="和size中间部分

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


import urllib.request
import re

def get_img(url):
    data = urllib.request.urlopen(url)
    #实现对目标url的访问，文件对象
    html = data.read()
    #读取
    html = html.decode('utf-8')
    #编码方式  TypeError: can't use a string pattern on a bytes-like object
    img = re.compile(r'src="(.+?\.jpg)" size')
    #正则表达式
    imglist = re.findall(img,html)
    #以列表的形式返回能匹配的子串
    x = 1
    for imgurl in imglist:
        urllib.request.urlretrieve(imgurl,'pic\%s.jpg' %x)    #下载到本地
        x += 1
    y = x -1
    print('下载%s张图片完成.'%y)

url = "https://tieba.baidu.com/p/5127857460?fr=ala0&pstaala=2&tpl=5"
get_img(url)

文本操作

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15


def gl(path,context):
    f = open(path,'a')
    f.write("张三\n")
    f.close()
    f = open(path,'r')   
    words = f.readlines()
    for word in words:
        word = word.strip()
        context = context.replace(word,'XX')
    f.close()
    print(context)

path = 'C:/Users/john/workspace/PythonDemo4/src/words.txt'
context = input('输入一句话：')
gl(path,context)