-
Notifications
You must be signed in to change notification settings - Fork 0
/
tfidf-table.py
80 lines (72 loc) · 1.99 KB
/
tfidf-table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from collections import defaultdict
k=5000
ab=0
table=defaultdict(list)
f=open('table.csv','r')
lis=f.readlines()
f.close()
temp=lis[0].split(',')
x=len(lis[0].split(','))
for i in range(0,9):
if i==8:
k=17264
else:
k=(i+1)*2000
ab=(i)*2000
try:
for abc in temp:
table[abc]=[]
print "Hello World"
lisLength = len(lis)
for i in range(1,lisLength):
if i>k:
break
if i<ab:
continue
temp2=lis[i].split(',')
for j in range(x):
table[temp[j]].append(temp2[j])
print "checkpoint"
print (float(table[temp[0]][0]))
length=x
for i in range(1,lisLength):
print i,"section 2"
if i>k:
break
if i<ab:
continue
for x in temp:
if x=='doc-title' or x=='Class-Label' or x=='':
continue
#print table[x][i]
try:
table[x][abs(i-ab)]=round(float(table[x][abs(i-ab)])/float(length),5)
except:
continue
print "Hello World Again"
t = open('tfidf-'+str(ab/2000)+'.csv', 'wb')
if ab == 0:
test=''
for i in range(length):
if i==length-1:
test+=temp[i]
break
test+=temp[i]+','
t.write(test)
for i in range(1,lisLength):
print i,k,ab,i>k-2,i<ab+2,abs(i-ab)
if i>k-1:
break
if i<ab+1:
continue
test=''
for x in range(length):
if x==length-1:
test+=str(table[temp[x]][abs(i-ab)])
break
test+=str(table[temp[x]][abs(i-ab)])+','
t.write(test)
print "choose to see the positive."
except Exception as e:
print e
t.close()