python
定制开发小程序每天进步亿点点…
1. 定制开发小程序新浪搜索接口爬取
- 使用知识document.charset 定制开发小程序控制台检查页面编码类型
- res.encoding = ‘’ 解码
- requests.get请求
import requestsurl = 'https://search.sina.com.cn/?'# 定制开发小程序搜索关键字key = '0000'# 定制开发小程序设置请求头和数据header = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4449.0 Safari/537.36'}params ={ 'q': key, 'c': 'news', 'from': 'inde'}res = requests.get(url, headers=header, data=params )# 输出结果,document.charset定制开发小程序控制台检查编码结果res.encoding = 'utf8'print(res.text)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
2. 定制开发小程序访问百度更多页面
res.status_code
定制开发小程序返回请求状态码res.headers
定制开发小程序定制开发小程序返回请求头res.content
定制开发小程序以二进制的形式返回res.content.decode('utf8')
定制开发小程序对返回的二进制内容进行解析
# 第一步:导入包import requests# 定制开发小程序第二步确定访问的地址,准确爬取url = 'https://www.baidu.com/more/'# 定制开发小程序模拟服务器发送get请求res= requests.get(url)# 定制开发小程序返回请求内容# print(res) # 定制开发小程序返回请求响应码print(res.status_code)print(res.headers) # 返回请求头# 使用encoding转码res.encoding = 'utf8'# print(res.text) # 定制开发小程序返回页面信息以文本的形式返回源代码,可能打印出的文本为乱码,转码# 二进制解码 decode# print(res.content.decode('utf8'))# print(res.content) # 打印的内容为二进制内容
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
3. 百度贴吧爬取前n页
import requests, osbase_url = 'https://tieba.baidu.com/f?'headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',}# 保存文件地址dirname = './tieba/woman/'if not os.path.exists(dirname): os.makedirs(dirname)# len(对象)表示某个字符串或者数组的长度# range函数表示 返回的是列表# pn中表示的是页码,i为range定义的页码,从0-10,使用抓包,寻找规律# key自定义关键词key = 'JavaScript'for i in range(0, 10): params = { 'ie': 'utf-8', 'kw': key, 'pn': str(i * 50) } response = requests.get(base_url, headers=headers, params=params) with open(dirname + '美女第%s页.html' % (i+1), 'w', encoding='utf-8') as file: file.write(response.content.decode('utf-8'))
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
4. 使用百度翻译接口
import requestsurl = 'https://fanyi.baidu.com/sug'# 写入要翻译的内容word = input('请输入翻译的内容:')data = { 'kw': word}header = { 'content-length': str(len(word)), # 分析接口每次传入的文本长度会变化,使用len函数 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest' # 自动解析传入的字符形式}# 请求服务器res = requests.post(url, data=data, headers=header)# res.join() 将对象序列化为一个字符串rescon = res.json()['data']# print(res.json()['data']) # 返回服务器结果,数组的形式for i in rescon: print(i['k']+'' + i['v']);
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
5. 爬取有道翻译
import requestsimport timeimport hashlibimport randomimport json# 创建一个类class translation: def __init__(self): self.url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule' # 输入要翻译的文字 self.key = input('请输入要翻译的内容:') def fanyi(self): # 计算字符的长度,根据分析可知 中文字符占9个长度 英文占1个 使用编码后中文字符所占字符为3 if len(self.key) <= len((self.key).encode('utf8')): strs = (len((self.key).encode('utf8')) - len(self.key))//2 mstr = len(self.key) - strs keylen = 234 + mstr + strs * 9 print('当前输入的内容的长度为:%s' % keylen) # 获取当前的时间 random.randint 生成一个0-10的随机数 ts = str(int(time.time())*1000) lts = str(int(time.time())*1000) + str(random.randint(0, 10)) # 获取sign的值 n.md5("fanyideskweb" + e + i + "Tbh5E8=q6U3EXe+&L[4c@") strs1 = 'fanyideskweb'+self.key+lts+'Tbh5E8=q6U3EXe+&L[4c@' # print(strs1) # 加密 sign = hashlib.md5(strs1.encode('utf8')).hexdigest() # print(sign) header = { 'Content-Length': str(keylen), 'Cookie': 'OUTFOX_SEARCH_USER_ID=-368708839@10.108.160.18; JSESSIONID=aaaL2DMAbpTgg8Qpc2xUw; OUTFOX_SEARCH_USER_ID_NCOO=1451460344.418452; ___rl__test__cookies=1561684330987', 'Referer': 'http://fanyi.youdao.com/', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OSX10_14_2) AppleWebKit/537.36(KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } data = { 'i': self.key, 'from': 'AUTO', 'to': 'AUTO', 'smartresult': 'dict', 'client': 'fanyideskweb', 'salt': lts, # 获取的时间戳和随机数组合 'sign': sign, # 加密后 'lts': ts, # 时间戳 'bv': '75b5d8bae54495d5ccd243908d1f65d4', 'doctype': 'json', 'version': '2.1', 'keyfrom': 'fanyi.web', 'action': 'FY_BY_REALTlME' } ress =requests.post(self.url, headers=header, data=data).text res =json.loads(ress) # print(res) # 输出结果 results = res['translateResult'][0][0]['tgt'] print(self.key+'的翻译结果为:%s' % results)if __name__ == '__main__': translation().fanyi()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
import hashlibimport randomimport timeimport requestsimport json"""向有道翻译发送data,得到翻译结果"""class Youdao: def __init__(self, msg): self.msg = msg self.url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule' self.D = "Tbh5E8=q6U3EXe+&L[4c@" self.salt = self.get_salt() self.sign = self.get_sign() self.ts = self.get_ts() def get_md(self, value): # md5加密 m = hashlib.md5() # m.update(value) m.update(value.encode('utf-8')) return m.hexdigest() def get_salt(self): # 根据当前时间戳获取salt参数 s = str(int(time.time() * 1000)) + str(random.randint(0, 10)) return str(s) def get_sign(self): # 使用md5函数和其他参数,得到sign参数 s = "fanyideskweb" + self.msg + self.salt + self.D return self.get_md(s) def get_ts(self): # 根据当前时间戳获取ts参数 s = int(time.time() * 1000) return str(s) def get_result(self): Form_Data = { 'i': self.msg, 'from': 'AUTO', 'to': 'AUTO', 'smartresult': 'dict', 'client': 'fanyideskweb', 'salt': self.salt, 'sign': self.sign, 'ts': self.ts, 'bv': 'c6b8c998b2cbaa29bd94afc223bc106c', 'doctype': 'json', 'version': '2.1', 'keyfrom': 'fanyi.web', 'action': 'FY_BY_CLICKBUTTION' } headers = { 'Cookie': 'OUTFOX_SEARCH_USER_ID=-368708839@10.108.160.18; JSESSIONID=aaaL2DMAbpTgg8Qpc2xUw; OUTFOX_SEARCH_USER_ID_NCOO=1451460344.418452; ___rl__test__cookies=1561684330987', 'Referer': 'http://fanyi.youdao.com/', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OSX10_14_2) AppleWebKit/537.36(KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } response = requests.post(self.url, data=Form_Data, headers=headers).text translate_results = json.loads(response) # 找到翻译结果 if 'translateResult' in translate_results: translate_results = translate_results['translateResult'][0][0]['tgt'] print("翻译的结果是:%s" % translate_results) else: print(translate_results)if __name__ == "__main__": y = Youdao('我成功啦') y.get_result()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
6. 起点中文网页面数据分析
import requestsimport osimport jsonfrom lxml import etreeimport pandas as pdimport time# 导入字体解密模块from fontTools.ttLib import TTFont'''小说数据的爬取及其数据化分析'''results = { 'title': [], # 文章标题 'author': [], # 小说作者 'classifies': [], # 小说类别 'describes': [] # 小说的描述 # 'detail': [], # 文章详情地址 # 'wordcount': [] # 文字统计}class novel: def __init__(self, value): self.header = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4464.5 Safari/537.36', } self.page = value self.url = 'https://www.qidian.com/rank/recom?dateType=1&page={}'.format(self.page) self.foreach(value) # 页面请求 def dataAnalysis(self): res = requests.get(self.url, headers=self.header).text return res # 页面解析 def etreehtml(self): # 使用etree.Html方法 lists = etree.HTML(self.dataAnalysis()) # print(lists) content = lists.xpath('//*[@id="rank-view-list"]/div/ul/li') return content # self.dataAnalysis() # 对加密字体进行解密处理 def decodeTtf(self): font = TTFont('UdygXvZa.ttf') font.saveXML('fft.xml') # 获取字体映射关系 font_cmap = font['cmap'].getBestCmap() print(font_cmap) # 对数据进行遍历分析 def foreach(self,value): for i in self.etreehtml(): # 小说标题 title = i.xpath('./div[2]/h4/a/text()')[0].replace('', '') results['title'].append(title) # 小说作者 author = i.xpath('./div[2]/p[1]/a[1]/text()')[0].replace('', '') results['author'].append(author) # 小说类别 classifies = i.xpath('./div[2]/p[1]/a[2]/text()')[0].replace('', '') results['classifies'].append(classifies) # 小说描述 describes = i.xpath('./div[2]/p[2]/text()')[0].replace(" ", '').replace('', '').replace('\t', '').replace('\r', '') results['describes'].append(describes) print('第%s个页面已爬取完成------' % value) def buildcsv(self): # 创建表格 df = pd.DataFrame(results) # 将解码方式改为ANSI 打开可以解决中文乱码的问题 df.to_csv('qidian1.csv', encoding='utf8') print('------表格数据创建完成!!!') # # 文章详情页面 # detail = i.xpath('./div[3]/p/a[1]/@href')[0].replace('', '') # 将获取到的换行数据取消,并且导入到新的数组中 # results['detail'].append(detail) # # print(results) # # 进入子页面对页面的数据进行爬取遍历分别爬取 # print(results['detail']) # for i in results['detail']: # url = 'https:%s' % i # res = requests.get(url, headers=self.header).text # # 对子页面进行解析 # childPage = etree.HTML(res) # # numcount = childPage.xpath('/html/body/div/div[6]/div[1]/div[2]/p[3]/em') # # 字数tongj # wordcount = childPage.xpath('/html/body/div/div[6]/div[1]/div[2]/p[3]/em[1]/span/text()') # # 将字数统计加入到wordcounts当前数组中 # results['wordcount'].append(wordcount) # print(results['wordcount'])if __name__ == '__main__': # 也可以使用for i range(1,6) 将所有的数据爬取出来 # novel(value=input('请输入爬取的页面1-5:')) # 选择单页爬取或者多页爬取 for a in range(1, 6): p = novel(str(a)) time.sleep(1.5) print("------开始创建表格信息") p.buildcsv()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
7. 将excel文件中的sheet分离并以csv格式输出
# 导入所需模块import xlrdimport pandas as pdimport os'''将一个excel文件中的多个sheet分离出来并且以csv格式保存'''def excel2csv(excel_file): # 打开excel文件 workbook = xlrd.open_workbook(excel_file) # 获取所有sheet名字 sheet_names = workbook.sheet_names() for worksheet_name in sheet_names: # 遍历每个sheet并用Pandas读取 data_xls = pd.read_excel(excel_file, worksheet_name, index_col=None) # 获取excel当前目录 dir_path = os.path.abspath(os.path.dirname(excel_file)) # 转换成csv并保存到excel所在目录下的csv文件夹中 csv_path = dir_path + '\\csv\\' if not os.path.exists(csv_path): os.mkdir(csv_path) data_xls.to_csv(csv_path + worksheet_name + '.csv', index=None, encoding='utf-8')excel2csv(r'D:\应用软件\autojs\Android\python\项目\xlsx转换为csv文件格式\1.xlsx')
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
8. 批量修改文件名
#!/usr/bin/python# -*- coding:utf-8 -*-import osimport randomdef reanme(): img_path = os.getcwd() + '/img' img_list = os.listdir(img_path) for img in img_list: if img.endswith('.jpg'): src = os.path.join(os.path.abspath(img_path), img) # 原先的图片名字 # 根据自己的需要重新命名,可以把 + img改成你想要的名字 re_img = img.split('.')[0][1:3] + "." + img.split('.')[1] dst = os.path.join(os.path.abspath(img_path), f'_{str(random.random()).replace(".", "")[0:8]}_{re_img}') os.rename(src, dst) # 重命名,覆盖原先的名字reanme()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
9. 批量修改多个文件夹中文件的名称
- 指定要修改的文件后缀,如果有多个文件需要更改加条件,如果不想改变文件的后缀,需要切割文件名然后拼接
#!/usr/bin/python# -*- coding:utf-8 -*-import osdef multfolderRename(): outer_path = os.getcwd() + '\\photo1' # 修改的文件夹名称,父级 folder_list = os.listdir(outer_path) # 列举文件夹 for folder in folder_list: inner_path = os.path.join(outer_path, folder) file_list = os.listdir(inner_path) # 列举图片 for index, item in enumerate(file_list): # 指定要修改的文件后缀,如果有多个文件需要更改加条件,如果不想改变文件的后缀,需要切割文件名然后拼接 if item.endswith('.jpg'): # abspath 返回文件的绝对路径 src = os.path.join(os.path.abspath(inner_path), item) # 原图的地址 dst = os.path.join(os.path.abspath(inner_path), str(folder) + '_' + str( index) + '.jpg') # 新图的地址(这里可以把str(folder) + '_' + str(i) + '.jpg'改成你想改的名称) try: os.rename(src, dst) except: continue
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
10. 将excel文件转换成json数据
import xlrd, json, codecsfrom collections import OrderedDictwb = xlrd.open_workbook('code.xlsx') # 需要修改的转换的xlsx文件convert_list = []sh = wb.sheet_by_index(0)title = sh.row_values(0) # 属性名for row_num in range(1, sh.nrows): row_v = sh.row_values(row_num) # 输出的是每一行的数据 single = OrderedDict() # 有序字典 for column in range(0, len(row_v)): # print(title[colnum], rowvalue[colnum]) single[title[column]] = row_v[column] convert_list.append(single)with codecs.open('file.json', "w", "utf-8") as f: f.write(json.dumps(convert_list, ensure_ascii=False, indent=2) + '')
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
11. 将一个excel文件中的多个sheet分离出来并且以csv格式保存
# 导入所需模块import xlrd,os,globimport pandas as pd'''将一个excel文件中的多个sheet分离出来并且以csv格式保存'''class csvv: def __init__(self, excel_file): self.path = os.getcwd() + '/csv/*.csv' self.excel2csv(excel_file) def excel2csv(self, excel_file): # 打开excel文件 workbook = xlrd.open_workbook(excel_file) # 获取所有sheet名字 sheet_names = workbook.sheet_names() for worksheet_name in sheet_names: # 遍历每个sheet并用Pandas读取 data_xls = pd.read_excel(excel_file, worksheet_name, index_col=None) # 获取excel当前目录 dir_path = os.path.abspath(os.path.dirname(excel_file)) # 转换成csv并保存到excel所在目录下的csv文件夹中 csv_path = dir_path + '\\csv\\' if not os.path.exists(csv_path): os.mkdir(csv_path) data_xls.to_csv(csv_path + worksheet_name + '.csv', index=None, encoding='utf-8') self.hebin() # 将分离出来csv格式的文件合并成一个表 def hebin(self): csv_list = glob.glob(os.getcwd() + '/csv/*.csv') print(u'共发现%s个CSV文件' % len(csv_list)) print(u'正在处理') for i in csv_list: # 循环读取同文件夹下的csv文件 fr = open(i, 'rb').read() with open('all.csv', 'ab') as f: # 将结果保存为result.cs f.write(fr) print(u'合并完毕!')if __name__ == '__main__': dirr = os.getcwd() + '\\csv' if not os.path.exists(dirr): csvv(os.getcwd() + '\\all.xlsx')
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
12. 将复杂的表格转换成json格式的文件
# -*- coding:utf-8 -*-import pandas as pdimport collections, jsonarea_data = pd.read_csv('all.csv', encoding='utf8', sep=',') # gb18030print(area_data.head())indicator = area_data.columns[1:5].tolist()print(indicator)f = open("all.json", "w+", encoding='utf-8')for i in range(len(area_data)): area_dict = collections.OrderedDict() # 利用OrderedDict()建立有序词典 area_dict['question'] = str(area_data.loc[i, 'question']) area_dict['answer'] = str(area_data.loc[i, 'answer']) # area_dict['lat'] = str(area_data.loc[i, '经度']) area_dict['options'] = indicator value_list = area_data.iloc[i, 1:5].tolist() value_list_new = [str(x) for x in value_list] # value_list.append(area_data.iloc[i, 1:4].tolist().astype(str)) value_list.append(area_data.iloc[i, 1:5].tolist()) area_dict['values'] = value_list_new # f.writelines(json.dumps(area_dict, ensure_ascii=False) + '') f.write(json.dumps(area_dict, ensure_ascii=False) + '') # f.writelines(json.dumps(area_dict, ensure_ascii=False, indent=4) + ',')f.close() # 将文件关闭# 使用json模块将构造好的字典保存到文件中# area_dict.encode("utf-8")
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
13. 使用python发送qq邮箱
# 发送多种类型的邮件import smtplibfrom email.mime.multipart import MIMEMultipartfrom email.mime.text import MIMETextdef postQQ(text, QQ_Code): """ QQ邮箱发送 :param text: 发送的内容 :param QQ_Code: 接收方邮箱 :return: 返回值为空 """ msg_from = '' # 发送方邮箱 passwd = '' # 就是上面的授权码 to = QQ_Code # 接受方邮箱 格式全称 msg = MIMEMultipart() # 设置邮件内容 # MIMEMultipart类可以放任何内容 # content = str(text) content = ''' <head><style>::-webkit-scrollbar {width: 6px;height: 5px;}::-webkit-scrollbar-track {background-color: rgba(50, 57, 61, 0.2);border-radius: 2em;}::-webkit-scrollbar-thumb {background-color: #202b33;background-image: -webkit-linear-gradient(45deg, hsla(0, 0%, 100%, 0.4) 25%, transparent 0, transparent 50%, hsla(0, 0%, 100%, .4) 0, hsla(0, 0%, 100%, .4) 75%, transparent 0, transparent);border-radius: 2em;}</style></head><body><div class="email" style="width: 340px; height: 400px; background-color: #cce2dd; margin-top: 50px; margin-left: auto; margin-right: auto;border-radius: 16px; box-shadow: 1px 2px 5px rgb(0,0,0,0.3);position: relative; overflow: hidden;"><img src="https://cdn.jsdelivr.net/gh/Rr210/image@master/hexo/4/0072Vf1pgy1foxlhi4bpsj31kw0w0qs8.jpg" alt="" style="display: block; width: 100%;"><h3 style="background:hsla(249, 13%, 20%, 0.659); border-radius: 10px;width: 80%;height: 40px; line-height: 40px; text-align: center;font-size: 16px; position: absolute;top: 88px;left: 34px;color: #e7dfee;"> 别慌别慌~~这只是一个xx提醒!!</h3><h4 style="position:absolute;top: 45px;right:12px;height: 30px; color: #1f3834;">————来自【xxxx】的提醒:</h4><div readonly="readonly" style="margin:20px auto 0; display: flex; justify-content: center; align-items: center; border-radius:10px; outline:none; padding: 10px; background-color: hsla(220, 12%, 65%, 0.478);resize:none;max-width: 300px;height: 100px;max-height: 100px; box-shadow: 0 0 10px #352c2c3b;border: 1px solid #a0b3d6; font-size: 12px; overflow-wrap: break-word;-webkit-user-modify: read-only">''' + text + '''</div><div style="font-size: 12px;margin:20px 0 0;display: flex; justify-content: center; align-items: center; text-align: center;color:#200f0f;"> <div>©2021 by</div><a style="text-decoration:none; color:#7c4a0d; margin-left: 5px;" href="https://u.mr90.top">Harry</a></div><h6 style="color: #901594;right:10px;bottom:-20px;position: absolute;">by <a href="https://github.com/Rr210/" target="_blank">Harry</a></h6></div></body> ''' msg.attach(MIMEText(content, 'html', 'utf-8')) # 把内容加进去 msg['Subject'] = "这里是自定义的主题" # 设置邮件主题 msg['From'] = msg_from # 发送方信息 s = smtplib.SMTP_SSL("smtp.qq.com", 465) # 开始发送 通过SSL方式发送,服务器地址和端口 s.login(msg_from, passwd) # 登录邮箱 s.sendmail(msg_from, to, msg.as_string()) # 开始发送if __name__ == "__main__": postQQ('', '')
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
14. 压缩图片,还有base64格式的图片文件
import base64, io, os, randomfrom PIL import Image, ImageFile# 压缩图片文件def compress_image(index): """不改变图片尺寸压缩到指定大小 :param outfile: 压缩文件保存地址 :param mb: 压缩目标,KB :param step: 每次调整的压缩比率 :param quality: 初始压缩比率 :return: 压缩文件地址,压缩文件大小 """ outfile = os.getcwd() + f'\\{index}.png' mb = 190 quality = 85 k = 0.9 o_size = os.path.getsize(outfile) // 1024 print('压缩前得图片大小------' + str(o_size)) if o_size <= mb: return reanme(index) ImageFile.LOAD_TRUNCATED_IMAGES = True while o_size > mb: im = Image.open(outfile) x, y = im.size out = im.resize((int(x * k), int(y * k)), Image.ANTIALIAS) try: out.save(outfile, quality=quality) except Exception as e: print(e) break o_size = os.path.getsize(outfile) // 1024 print('压缩后得图片大小------' + str(o_size)) reanme(index) return outfiledef reanme(index): ran_name = 'img_' + str(random.random()).replace('.', '') + '_' + str(random.randint(1, 100000)) img_path = os.getcwd() img_list = os.listdir(img_path) for img in img_list: if img.endswith('.png') & img.startswith(f'{index}'): src = os.path.join(os.path.abspath(img_path), img) # 原先的图片名字 dst = os.path.join(os.path.abspath(img_path), ran_name + img) # 根据自己的需要重新命名,可以把'E_' + img改成你想要的名字 os.rename(src, dst) # 重命名,覆盖原先的名字# 压缩base64的图片def compress_image_bs4(b64, mb=190, k=0.9): """不改变图片尺寸压缩到指定大小 :param outfile: 压缩文件保存地址 :param mb: 压缩目标,KB :param step: 每次调整的压缩比率 :param quality: 初始压缩比率 :return: 压缩文件地址,压缩文件大小 """ f = base64.b64decode(b64) with io.BytesIO(f) as im: o_size = len(im.getvalue()) // 1024 if o_size <= mb: return b64 im_out = im while o_size > mb: img = Image.open(im_out) x, y = img.size out = img.resize((int(x * k), int(y * k)), Image.ANTIALIAS) im_out.close() im_out = io.BytesIO() out.save(im_out, 'jpeg') o_size = len(im_out.getvalue()) // 1024 b64 = base64.b64encode(im_out.getvalue()) im_out.close() return str(b64, encoding='utf8')if __name__ == "__main__": # 获取图片路径 for i in range(0, 6): compress_image(i) print('----------------') print(f'第{i}张压缩成功!!')
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
15. 图片的灰度处理
from PIL import Imagefrom numpy import *input_dir = './pics/' # 文件夹名字后一定要加/,才能识别为打开文件夹中的内容out_dir1 = './pics_gray/' # 进行灰度化后的图片保存在该文件夹下# 3.1灰度化a = os.listdir(input_dir)for i in a: print(i) Img = Image.open(input_dir + i) # 用PIL的库来逐个读取图片 Img_gray = Img.convert('L') # 灰度化L Img_gray.save(out_dir1 + i) # 用PIL的库来逐个保存图片到指定路径下 print('~~~~~~~~~~~~~This is a picture after graying~~~~~~~~~~') print(Img_gray)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
16. 图片去重
import cv2import matplotlib.pyplot as pltimport osimport numpy as np# 计算方差def getss(list): # 计算平均值 avg = sum(list) / len(list) # 定义方差变量ss,初值为0 ss = 0 # 计算方差 for l in list: ss += (l - avg) * (l - avg) / len(list) # 返回方差 return ss# 获取每行像素平均值def getdiff(img): # 定义边长 Sidelength = 8 # 缩放图像 img = cv2.resize(img, (Sidelength, Sidelength), interpolation=cv2.INTER_CUBIC) # 灰度处理 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # avglist列表保存每行像素平均值 avglist = [] # 计算每行均值,保存到avglist列表 for i in range(Sidelength): avg = sum(gray[i]) / len(gray[i]) avglist.append(avg) # 返回avglist平均值 return avglist# 读取测试图片list_name = os.listdir('photo1')for word in list_name: path = os.getcwd() + '\\photo1\\' + word # print(path) for dirpath, dirnames, filenames in os.walk(path): # 目录路径,目录名称,文件名称(全称) list = [] for filename in filenames: # 遍历主文件夹下所有的文件名称,此时filename代表文件名称 path = dirpath + '\\' + filename print(filename) # print(type(dirpath + '\\'+filename)) # print(os.path.join(dirpath,filename)) # print(filename) # img = cv2.imread(path) img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), cv2.IMREAD_UNCHANGED) # img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) gtess = getss(getdiff(img)) list.append(gtess) print(list) for i in range(len(list)): # break print('i=', i) # print('i中的list长度:', len(list)) a = [] for j in range(i + 1, len(list) + 1): # 此处列表的长度是实时更新的,range不会随列表元素变化进行更新变化进行实时更新 if len(list) <= j: print('超出范围!!!!!!!!!!!!!!!') f=0 for c in range(len(a)): b = a[c] print('b====', b) print('前list:', list) del list[b-f] f+=1 print('删除后的list:', list) break elif abs(list[i] - list[j]) < 10: print('有相同的!!') # 遍历文件夹,删除方差值与列表中的方差值相同的照片 # 遍历文件夹,获取其方差值 for filename in filenames: print(filename) path = dirpath + '\\' + filename img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), cv2.IMREAD_UNCHANGED) gtess = getss(getdiff(img)) # 当检索到的图片方差值等于列表中某个方差时,删除该图片 if gtess == float(list[j]): os.remove(path) filenames.remove(filename) a.append(j) # 这里是为了将已经删除目标文件的列表元素序号加入新列表中,后面进行一次删除 # 若删除一次目标文件就删除一次元素,会出现元素检索时跳项的问题 print('a===', a) print('删除:', path) break else: continue
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
17. 钉钉机器人发送通知
- 钉钉官方文档【】
import urllib, requests, json, hmac, hashlib, base64, timedef notice(text): """ 钉钉发送通知 :param text: 要发送的内容 :return: """ timestamp = str(round(time.time() * 1000)) secret = '' # 申请的钉钉机器人的密钥 http = '' # webhook的地址 secret_enc = secret.encode('utf-8') string_to_sign = '{}{}'.format(timestamp, secret) string_to_sign_enc = string_to_sign.encode('utf-8') hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest() sign = urllib.parse.quote_plus(base64.b64encode(hmac_code)) # 导入依赖库 headers = {'Content-Type': 'application/json'} # 定义数据类型 # 截至到×tamp之前 webhook = f'{http}×tamp={timestamp}&sign={sign}' # 定义要发送的数据 data = { # 定义内容 "msgtype": "markdown", "markdown": { "title": "这是定义的标题", "text": "> 发送的内容%s" % text } } requests.post(webhook, data=json.dumps(data), headers=headers) # 发送post请求notice('')
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
18. 批量修改文件夹中的图片名称,并进行压缩
# !/usr/bin/python# -*- coding:utf-8 -*-import base64, io, os, randomfrom PIL import Image, ImageFile# 压缩图片文件def compress_image(img_path): """不改变图片尺寸压缩到指定大小 :param img_path: 文件路径 :param outfile: 压缩文件保存地址 :param kb: 压缩目标,KB :param step: 每次调整的压缩比率 :param quality: 初始压缩比率 :return: 压缩文件地址,压缩文件大小 """ outfile = img_path kb = 190 quality = 85 k = 0.9 o_size = os.path.getsize(outfile) // 1024 print('图片:{}'.format(img_path.split("\\")[-1])) print('压缩前得图片大小------' + str(o_size)+'kb') if o_size <= kb: print('该图片{}不需要压缩'.format(img_path.split("\\")[-1])) print('-------------------------') return ImageFile.LOAD_TRUNCATED_IMAGES = True while o_size > kb: im = Image.open(outfile) x, y = im.size out = im.resize((int(x * k), int(y * k)), Image.ANTIALIAS) try: out.save(outfile, quality=quality) except Exception as e: print(e) break o_size = os.path.getsize(outfile) // 1024 print('压缩后得图片大小------' + str(o_size) + 'kb') print('-------------------------') return outfile# 批量修改文件夹中图片的名称def muilfile(): img_path = os.getcwd() + '/img' img_list = os.listdir(img_path) img_format = ['png', 'jpg', 'JPG', 'webp', 'gif', 'jpeg'] for img in img_list: if img.split('.')[-1] in img_format: src = os.path.join(os.path.abspath(img_path), img) # 原先的图片名字的路径 # 根据自己的需要重新命名,可以把 + img改成你想要的名字 random_letter = random.sample('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 4) re_img = "hr_" + str(random.randint(0, 10000)) + "".join(random_letter) + "." + img.split('.')[-1] dst = os.path.join(os.path.abspath(img_path), re_img) os.rename(src, dst) # 重命名,覆盖原先的名字 compress_image(dst)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
公众号:今日在学
一起学习可好?扫码关注,了解更多!!