You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
93 lines
2.7 KiB
93 lines
2.7 KiB
11 years ago
|
#!/usr/bin/env python
|
||
|
# -*- coding:utf-8 -*-
|
||
|
'''
|
||
|
Step 9
|
||
|
'''
|
||
|
from __future__ import print_function
|
||
|
from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR
|
||
|
import os
|
||
|
import csv
|
||
|
import re
|
||
|
|
||
|
FN_CONST = 19
|
||
|
|
||
|
def get_retrieved_yynn_count(model):
|
||
|
result = {}
|
||
|
for i in '1234':
|
||
|
P = os.path.join(TARGET_DIR, 'part7', '%s-%s.csv' % (model, i))
|
||
|
with open(P, 'rb') as f:
|
||
|
rows = csv.reader(f)
|
||
|
for row in rows:
|
||
|
if row[0] not in result:
|
||
|
result[row[0]] = row[len(row)-1]
|
||
|
return result
|
||
|
|
||
|
|
||
|
def process_file(seven_data, data, header):
|
||
|
start_ind = 0
|
||
|
for i in xrange(0, len(header)):
|
||
|
if re.search(r'^R', header[i]):
|
||
|
start_ind = i
|
||
|
break
|
||
|
|
||
|
prf_output = []
|
||
|
for i in xrange(start_ind, len(data)):
|
||
|
R_id = header[i]
|
||
|
if R_id not in seven_data:
|
||
|
print("%s missing in step7" % R_id)
|
||
|
continue
|
||
|
TP = float(data[i])
|
||
|
FP = float(seven_data[R_id]) - TP
|
||
|
FN = float(FN_CONST) - TP
|
||
|
# print('TP: %s, FP: %s, FN: %s' % (TP, FP, FN))
|
||
|
|
||
|
P = (TP / (TP + FP)) * 100
|
||
|
R = (TP / (TP + FN)) * 100
|
||
|
try:
|
||
|
F = 2 * (P * R) / (P + R)
|
||
|
except ZeroDivisionError:
|
||
|
F = 'inf'
|
||
|
print('ZeroDivisionError\n %s -- R_id: %s | P: %s | R: %s' % (data[0], R_id, P, R))
|
||
|
|
||
|
prf_output.append({
|
||
|
'R_id': R_id,
|
||
|
"P": P,
|
||
|
"R": R,
|
||
|
"F": F,
|
||
|
})
|
||
|
# print(prf_output)
|
||
|
return prf_output
|
||
|
|
||
|
|
||
|
def loop_thru_step8():
|
||
|
BASE_PATH = os.path.join(TARGET_DIR, 'part8')
|
||
|
for i in os.walk(BASE_PATH):
|
||
|
for j in i[2]:
|
||
|
output = []
|
||
|
seven = get_retrieved_yynn_count(j[:4])
|
||
|
fpath = os.path.join(BASE_PATH, j)
|
||
|
# print(fpath)
|
||
|
with open(fpath, 'rb') as f:
|
||
|
output_header = ['lo_id', j[:4]]
|
||
|
output_sub_header = ['', '', ]
|
||
|
rows = csv.reader(f)
|
||
|
header = rows.next()
|
||
|
is_header_created = False
|
||
|
for r in rows:
|
||
|
output_row = [r[0], j[:4]]
|
||
|
prf = process_file(seven, r, header)
|
||
|
for i in prf:
|
||
|
if not is_header_created:
|
||
|
output_header += [i['R_id'], '', '']
|
||
|
output_sub_header += ['P', 'R', 'F']
|
||
|
output_row += [i['P'], i['R'], i['F']]
|
||
|
output.append(output_row)
|
||
|
is_header_created = True
|
||
|
|
||
|
output_name = j
|
||
|
result = [output_header] + [output_sub_header] + output
|
||
|
create_csv(output_name, result, directory='part9')
|
||
|
is_header_created = False
|
||
|
|
||
|
loop_thru_step8()
|