Browse Source

[aug] s9

master
sipp11 11 years ago
parent
commit
c1bf03abd3
  1. 6
      s8.py
  2. 92
      s9.py

6
s8.py

@ -80,11 +80,9 @@ def main():
m_header = header[:]
m_header[1] = m
ros = get_ro_data(m, i)
for r in ros:
m_header.append(r[0])
m_header += [r[0] for r in ros]
result = middleman(lo_data, ros)
output_name = '%s-%s.csv' % (m, i)
# process header -- get ind of first R_xxx
number_ind = 0
cnt_ind = 0
@ -96,7 +94,7 @@ def main():
break
except ValueError:
cnt_ind += 1
__h = m_header[:2] + ['_' for i in xrange(0, number_ind-2)] + m_header[3:]
__h = m_header[:2] + ['_' for i in xrange(0, number_ind-2)] + m_header[2:]
result = [__h] + result
create_csv(output_name, result, directory='part8')

92
s9.py

@ -0,0 +1,92 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
'''
Step 9
'''
from __future__ import print_function
from base import create_csv, TARGET_DIR, MODEL_LIST, RAW_DIR
import os
import csv
import re
FN_CONST = 19
def get_retrieved_yynn_count(model):
result = {}
for i in '1234':
P = os.path.join(TARGET_DIR, 'part7', '%s-%s.csv' % (model, i))
with open(P, 'rb') as f:
rows = csv.reader(f)
for row in rows:
if row[0] not in result:
result[row[0]] = row[len(row)-1]
return result
def process_file(seven_data, data, header):
start_ind = 0
for i in xrange(0, len(header)):
if re.search(r'^R', header[i]):
start_ind = i
break
prf_output = []
for i in xrange(start_ind, len(data)):
R_id = header[i]
if R_id not in seven_data:
print("%s missing in step7" % R_id)
continue
TP = float(data[i])
FP = float(seven_data[R_id]) - TP
FN = float(FN_CONST) - TP
# print('TP: %s, FP: %s, FN: %s' % (TP, FP, FN))
P = (TP / (TP + FP)) * 100
R = (TP / (TP + FN)) * 100
try:
F = 2 * (P * R) / (P + R)
except ZeroDivisionError:
F = 'inf'
print('ZeroDivisionError\n %s -- R_id: %s | P: %s | R: %s' % (data[0], R_id, P, R))
prf_output.append({
'R_id': R_id,
"P": P,
"R": R,
"F": F,
})
# print(prf_output)
return prf_output
def loop_thru_step8():
BASE_PATH = os.path.join(TARGET_DIR, 'part8')
for i in os.walk(BASE_PATH):
for j in i[2]:
output = []
seven = get_retrieved_yynn_count(j[:4])
fpath = os.path.join(BASE_PATH, j)
# print(fpath)
with open(fpath, 'rb') as f:
output_header = ['lo_id', j[:4]]
output_sub_header = ['', '', ]
rows = csv.reader(f)
header = rows.next()
is_header_created = False
for r in rows:
output_row = [r[0], j[:4]]
prf = process_file(seven, r, header)
for i in prf:
if not is_header_created:
output_header += [i['R_id'], '', '']
output_sub_header += ['P', 'R', 'F']
output_row += [i['P'], i['R'], i['F']]
output.append(output_row)
is_header_created = True
output_name = j
result = [output_header] + [output_sub_header] + output
create_csv(output_name, result, directory='part9')
is_header_created = False
loop_thru_step8()
Loading…
Cancel
Save