#!/usr/bin/env python # -*- coding:utf-8 -*- ''' Step 9 ''' from __future__ import print_function from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR import os import csv import re FN_CONST = 19 def get_retrieved_yynn_count(model): result = {} for i in '1234': P = os.path.join(TARGET_DIR, 'part7', '%s-%s.csv' % (model, i)) with open(P, 'rb') as f: rows = csv.reader(f) for row in rows: if row[0] not in result: result[row[0]] = row[len(row)-1] return result def process_file(seven_data, data, header): start_ind = 0 for i in xrange(0, len(header)): if re.search(r'^R', header[i]): start_ind = i break prf_output = [] for i in xrange(start_ind, len(data)): R_id = header[i] if R_id not in seven_data: print("%s missing in step7" % R_id) continue TP = float(data[i]) FP = float(seven_data[R_id]) - TP FN = float(FN_CONST) - TP # print('TP: %s, FP: %s, FN: %s' % (TP, FP, FN)) P = (TP / (TP + FP)) * 100 R = (TP / (TP + FN)) * 100 try: F = 2 * (P * R) / (P + R) except ZeroDivisionError: F = 'inf' print('ZeroDivisionError\n %s -- R_id: %s | P: %s | R: %s' % (data[0], R_id, P, R)) prf_output.append({ 'R_id': R_id, "P": P, "R": R, "F": F, }) # print(prf_output) return prf_output def loop_thru_step8(): BASE_PATH = os.path.join(TARGET_DIR, 'part8') for i in os.walk(BASE_PATH): for j in i[2]: output = [] seven = get_retrieved_yynn_count(j[:4]) fpath = os.path.join(BASE_PATH, j) # print(fpath) with open(fpath, 'rb') as f: output_header = ['lo_id', j[:4]] output_sub_header = ['', '', ] rows = csv.reader(f) header = rows.next() is_header_created = False for r in rows: output_row = [r[0], j[:4]] prf = process_file(seven, r, header) for i in prf: if not is_header_created: output_header += [i['R_id'], '', ''] output_sub_header += ['P', 'R', 'F'] output_row += [i['P'], i['R'], i['F']] output.append(output_row) is_header_created = True output_name = j result = [output_header] + [output_sub_header] + output create_csv(output_name, result, directory='part9') is_header_created = False loop_thru_step8()