|
|
#!/usr/bin/env python |
|
|
# -*- coding:utf-8 -*- |
|
|
''' |
|
|
Step 7 |
|
|
''' |
|
|
from __future__ import print_function |
|
|
from base import create_csv, TARGET_DIR, MODEL_LIST |
|
|
import os |
|
|
import csv |
|
|
import random |
|
|
|
|
|
''' |
|
|
แก้ตรงนี้ไปก่อนละกัน No of Tree = “4Ts” และ No of R = “5” |
|
|
|
|
|
No of Tree ของคุณ ==== NoT_MATCH_ARG เป็น list หรือ tuple |
|
|
- ถ้าจะเอาอันไหนก็ใส่ใน list ไป เช่น เอา 4Ts, 2Ts >> ['4Ts', '2Ts'] |
|
|
- ถ้าเอาทุกอันจะใส่ ['4Ts', '3Ts', ... ] ให้ครบ หรือใช้ [] ก็ได้ |
|
|
No of R ของคุณ ======= TOTAL -- ถ้าต้องการทั้งหมดก็ = 0 |
|
|
แต่ว่า ผมไม่รับรองว่าจะใช้ได้ทั้งหมดเพราะว่า มันก็ขึ้นกับหลายอย่าง |
|
|
เช่น 1Ts มีไม่กี่อันอาจจะ error มั้ง? โดยเฉพาะเวลา random |
|
|
''' |
|
|
NoT_MATCH_ARG = ['2Ts', '1Ts'] |
|
|
TOTAL = 10 |
|
|
|
|
|
SRC_PATH = os.path.join(TARGET_DIR, 'part6') |
|
|
|
|
|
|
|
|
def get_yynn_count(cols): |
|
|
n = 0 |
|
|
for c in cols: |
|
|
if c in ('Y', 'Y*', 'N', 'N*'): |
|
|
n += 1 |
|
|
elif c in ('Y/N', 'Y/N*'): |
|
|
n += 2 |
|
|
return n |
|
|
|
|
|
|
|
|
def get_by_order(): |
|
|
for m in MODEL_LIST: |
|
|
result = [] |
|
|
count = 0 |
|
|
with open(os.path.join(SRC_PATH, '%s-6.csv' % m), 'rb') as f: |
|
|
rows = csv.reader(f) |
|
|
for r in rows: |
|
|
if NoT_MATCH_ARG and r[2] not in NoT_MATCH_ARG: |
|
|
continue |
|
|
row = [ |
|
|
r[0], # R_id |
|
|
'Ord_%s' % r[1], # no of R |
|
|
r[2], # No of Tree |
|
|
m, # model |
|
|
] + r[7:] |
|
|
row.append(get_yynn_count(r)) # ?????????? |
|
|
result.append(row) |
|
|
count += 1 |
|
|
if count >= TOTAL and TOTAL > 0: |
|
|
break |
|
|
|
|
|
output_name = '%s-1.csv' % m |
|
|
create_csv(output_name, result, directory='part7') |
|
|
|
|
|
|
|
|
def get_by_random(): |
|
|
for m in MODEL_LIST: |
|
|
lo_list = [] |
|
|
result = [] |
|
|
with open(os.path.join(SRC_PATH, '%s-6.csv' % m), 'rb') as f: |
|
|
rows = csv.reader(f) |
|
|
for r in rows: |
|
|
l = [ |
|
|
r[0], # R_id |
|
|
'', # no of R |
|
|
r[2], # No of Tree |
|
|
m, # model |
|
|
] + r[7:] |
|
|
l.append(get_yynn_count(r)) # ?????????? |
|
|
lo_list.append(l) |
|
|
|
|
|
picked = [] |
|
|
ok = False |
|
|
_TOTAL = len(lo_list) if TOTAL < 1 else TOTAL |
|
|
for i in xrange(0, _TOTAL): |
|
|
ok = False |
|
|
while not ok: |
|
|
pick = random.choice(lo_list) |
|
|
if NoT_MATCH_ARG and pick[2] not in NoT_MATCH_ARG: |
|
|
continue |
|
|
if pick[0] not in picked: |
|
|
picked.append(pick[0]) |
|
|
pick[1] = 'Ran_%s' % i |
|
|
ok = True |
|
|
|
|
|
result.append(pick) |
|
|
|
|
|
output_name = '%s-2.csv' % m |
|
|
create_csv(output_name, result, directory='part7') |
|
|
|
|
|
|
|
|
def get_by_group(model_name, step, **kwargs): |
|
|
new_prefix = kwargs.get('prefix', 'XX') |
|
|
output_suffix = kwargs.get('suffix', '3') |
|
|
|
|
|
result = [] |
|
|
count = 1 |
|
|
fpath = os.path.join(TARGET_DIR, 'part7', '%s-%s.csv' % (model_name, step)) |
|
|
with open(fpath, 'rb') as f: |
|
|
rows = csv.reader(f) |
|
|
# do nothing to 1st row |
|
|
first = rows.next() |
|
|
first[0] = '%s_%s' % (new_prefix, count) |
|
|
result.append(first) |
|
|
|
|
|
calibrator = first[:] |
|
|
for r in rows: |
|
|
count += 1 |
|
|
# มันดันเอาค่าที่รวมๆ ไปทำ YYNN ด้วยก็เลยตัด col สุดท้ายออก แค่นั้นแหละ |
|
|
_r = r[:-1] |
|
|
_r[0] = '%s_%s' % (new_prefix, count) |
|
|
for i in xrange(5, len(_r)): |
|
|
# get rid of asterik first |
|
|
calibrator[i] = calibrator[i].replace('*', '') |
|
|
# do comparison |
|
|
if _r[i] == 'Y/N' or _r[i] == calibrator[i] or calibrator[i] == '': |
|
|
pass |
|
|
elif (_r[i] == '' and calibrator[i] != '') or calibrator[i] == 'Y/N': |
|
|
_r[i] = '%s*' % calibrator[i] |
|
|
else: # one Y one N |
|
|
_r[i] = 'Y/N*' |
|
|
_r.append(get_yynn_count(_r)) # ?????????? |
|
|
result.append(_r) |
|
|
calibrator = _r[:] |
|
|
|
|
|
output_name = '%s-%s.csv' % (model_name, output_suffix) |
|
|
create_csv(output_name, result, directory='part7') |
|
|
|
|
|
|
|
|
get_by_order() |
|
|
get_by_random() |
|
|
|
|
|
for m in MODEL_LIST: |
|
|
for s in '12': |
|
|
get_by_group(m, s, prefix='Rg', suffix=('3' if s == '1' else '4'))
|
|
|
|