|
|
|
@ -8,12 +8,15 @@ from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR
|
|
|
|
|
import os |
|
|
|
|
import csv |
|
|
|
|
import re |
|
|
|
|
import sys |
|
|
|
|
|
|
|
|
|
FN_CONST = 19 |
|
|
|
|
|
|
|
|
|
def get_retrieved_yynn_count(model): |
|
|
|
|
|
|
|
|
|
def get_retrieved_yynn_count(model, **kwargs): |
|
|
|
|
result = {} |
|
|
|
|
for i in '1234': |
|
|
|
|
order = kwargs.get('order', '1234') |
|
|
|
|
for i in order: |
|
|
|
|
P = os.path.join(TARGET_DIR, 'part7', '%s-%s.csv' % (model, i)) |
|
|
|
|
with open(P, 'rb') as f: |
|
|
|
|
rows = csv.reader(f) |
|
|
|
@ -34,11 +37,13 @@ def process_file(seven_data, data, header):
|
|
|
|
|
for i in xrange(start_ind, len(data)): |
|
|
|
|
R_id = header[i] |
|
|
|
|
if R_id not in seven_data: |
|
|
|
|
print("%s missing in step7" % R_id) |
|
|
|
|
continue |
|
|
|
|
TP = float(data[i]) |
|
|
|
|
FP = float(seven_data[R_id]) - TP |
|
|
|
|
FN = float(FN_CONST) - TP |
|
|
|
|
# print(data[0]) |
|
|
|
|
# print(seven_data) |
|
|
|
|
# print('FP = %s - %s = %s' % (seven_data[R_id], TP, FP)) |
|
|
|
|
# print('TP: %s, FP: %s, FN: %s' % (TP, FP, FN)) |
|
|
|
|
|
|
|
|
|
P = (TP / (TP + FP)) * 100 |
|
|
|
@ -47,7 +52,8 @@ def process_file(seven_data, data, header):
|
|
|
|
|
F = 2 * (P * R) / (P + R) |
|
|
|
|
except ZeroDivisionError: |
|
|
|
|
F = 'inf' |
|
|
|
|
print('ZeroDivisionError\n %s -- R_id: %s | P: %s | R: %s' % (data[0], R_id, P, R)) |
|
|
|
|
# print('\nZeroDivisionError: ' |
|
|
|
|
# '%s -- R_id: %s | P: %s | R: %s' % (data[0], R_id, P, R), end='') |
|
|
|
|
|
|
|
|
|
prf_output.append({ |
|
|
|
|
'R_id': R_id, |
|
|
|
@ -63,10 +69,16 @@ def loop_thru_step8():
|
|
|
|
|
BASE_PATH = os.path.join(TARGET_DIR, 'part8') |
|
|
|
|
for i in os.walk(BASE_PATH): |
|
|
|
|
for j in i[2]: |
|
|
|
|
print('\r >> processing %s ' % j, end='') |
|
|
|
|
sys.stdout.flush() |
|
|
|
|
output = [] |
|
|
|
|
seven = get_retrieved_yynn_count(j[:4]) |
|
|
|
|
''' |
|
|
|
|
Nah, getting wrong data from step 7 |
|
|
|
|
''' |
|
|
|
|
_m = j[:4] |
|
|
|
|
_o = j[5] |
|
|
|
|
seven = get_retrieved_yynn_count(_m, **{'order': _o}) |
|
|
|
|
fpath = os.path.join(BASE_PATH, j) |
|
|
|
|
# print(fpath) |
|
|
|
|
with open(fpath, 'rb') as f: |
|
|
|
|
output_header = ['lo_id', j[:4]] |
|
|
|
|
output_sub_header = ['', '', ] |
|
|
|
@ -88,5 +100,8 @@ def loop_thru_step8():
|
|
|
|
|
result = [output_header] + [output_sub_header] + output |
|
|
|
|
create_csv(output_name, result, directory='part9') |
|
|
|
|
is_header_created = False |
|
|
|
|
print('\r >> done %s ' % j, end='') |
|
|
|
|
sys.stdout.flush() |
|
|
|
|
print('') |
|
|
|
|
|
|
|
|
|
loop_thru_step8() |
|
|
|
|