diff --git a/s8.py b/s8.py index 80a2806..5ba69b4 100755 --- a/s8.py +++ b/s8.py @@ -80,11 +80,9 @@ def main(): m_header = header[:] m_header[1] = m ros = get_ro_data(m, i) - for r in ros: - m_header.append(r[0]) + m_header += [r[0] for r in ros] result = middleman(lo_data, ros) output_name = '%s-%s.csv' % (m, i) - # process header -- get ind of first R_xxx number_ind = 0 cnt_ind = 0 @@ -96,7 +94,7 @@ def main(): break except ValueError: cnt_ind += 1 - __h = m_header[:2] + ['_' for i in xrange(0, number_ind-2)] + m_header[3:] + __h = m_header[:2] + ['_' for i in xrange(0, number_ind-2)] + m_header[2:] result = [__h] + result create_csv(output_name, result, directory='part8') diff --git a/s9.py b/s9.py new file mode 100755 index 0000000..6089ee5 --- /dev/null +++ b/s9.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- +''' +Step 9 +''' +from __future__ import print_function +from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR +import os +import csv +import re + +FN_CONST = 19 + +def get_retrieved_yynn_count(model): + result = {} + for i in '1234': + P = os.path.join(TARGET_DIR, 'part7', '%s-%s.csv' % (model, i)) + with open(P, 'rb') as f: + rows = csv.reader(f) + for row in rows: + if row[0] not in result: + result[row[0]] = row[len(row)-1] + return result + + +def process_file(seven_data, data, header): + start_ind = 0 + for i in xrange(0, len(header)): + if re.search(r'^R', header[i]): + start_ind = i + break + + prf_output = [] + for i in xrange(start_ind, len(data)): + R_id = header[i] + if R_id not in seven_data: + print("%s missing in step7" % R_id) + continue + TP = float(data[i]) + FP = float(seven_data[R_id]) - TP + FN = float(FN_CONST) - TP + # print('TP: %s, FP: %s, FN: %s' % (TP, FP, FN)) + + P = (TP / (TP + FP)) * 100 + R = (TP / (TP + FN)) * 100 + try: + F = 2 * (P * R) / (P + R) + except ZeroDivisionError: + F = 'inf' + print('ZeroDivisionError\n %s -- R_id: %s | P: %s | R: %s' % (data[0], R_id, P, R)) + + prf_output.append({ + 'R_id': R_id, + "P": P, + "R": R, + "F": F, + }) + # print(prf_output) + return prf_output + + +def loop_thru_step8(): + BASE_PATH = os.path.join(TARGET_DIR, 'part8') + for i in os.walk(BASE_PATH): + for j in i[2]: + output = [] + seven = get_retrieved_yynn_count(j[:4]) + fpath = os.path.join(BASE_PATH, j) + # print(fpath) + with open(fpath, 'rb') as f: + output_header = ['lo_id', j[:4]] + output_sub_header = ['', '', ] + rows = csv.reader(f) + header = rows.next() + is_header_created = False + for r in rows: + output_row = [r[0], j[:4]] + prf = process_file(seven, r, header) + for i in prf: + if not is_header_created: + output_header += [i['R_id'], '', ''] + output_sub_header += ['P', 'R', 'F'] + output_row += [i['P'], i['R'], i['F']] + output.append(output_row) + is_header_created = True + + output_name = j + result = [output_header] + [output_sub_header] + output + create_csv(output_name, result, directory='part9') + is_header_created = False + +loop_thru_step8()