Voyz's Studio.

python爬虫和数据处理

字数统计: 134阅读时长: 1 min
2019/06/20 Share

python爬虫和数据处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#coding=utf-8
import os
import pandas as pd
import glob

target_csv = '../data/train_data.csv'

def combine():
csv_list = glob.glob('../data/pre_data/*.csv')
print(u'共发现%s个CSV文件'% len(csv_list))
print(u'正在处理............')
for i in csv_list:
fr = open(i,'r').read()
with open(target_csv,'a') as f:
f.write(fr)
print(u'合并完毕!')


def dup(file):
df = pd.read_csv(file)
df = df.sort_values(by='期号-1')
datalist = df.drop_duplicates(subset='期号-1',keep='first')
datalist = datalist.drop(df.tail(1).index)
datalist.to_csv(file,index=False,sep=',')

if __name__ == '__main__':
combine()
dup(target_csv)
CATALOG
  1. 1. python爬虫和数据处理