|
|
|
|
#!/usr/bin/env python
|
|
|
|
|
# -*- coding:utf-8 -*-
|
|
|
|
|
'''
|
|
|
|
|
Step 7
|
|
|
|
|
'''
|
|
|
|
|
from __future__ import print_function
|
|
|
|
|
from base import create_csv, TARGET_DIR, MODEL_LIST
|
|
|
|
|
import os
|
|
|
|
|
import csv
|
|
|
|
|
import random
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
แก้ตรงนี้ไปก่อนละกัน No of Tree = “4Ts” และ No of R = “5”
|
|
|
|
|
|
|
|
|
|
No of Tree ของคุณ ==== NoT_MATCH_ARG เป็น list หรือ tuple
|
|
|
|
|
- ถ้าจะเอาอันไหนก็ใส่ใน list ไป เช่น เอา 4Ts, 2Ts >> ['4Ts', '2Ts']
|
|
|
|
|
- ถ้าเอาทุกอันจะใส่ ['4Ts', '3Ts', ... ] ให้ครบ หรือใช้ [] ก็ได้
|
|
|
|
|
No of R ของคุณ ======= TOTAL -- ถ้าต้องการทั้งหมดก็ = 0
|
|
|
|
|
แต่ว่า ผมไม่รับรองว่าจะใช้ได้ทั้งหมดเพราะว่า มันก็ขึ้นกับหลายอย่าง
|
|
|
|
|
เช่น 1Ts มีไม่กี่อันอาจจะ error มั้ง? โดยเฉพาะเวลา random
|
|
|
|
|
'''
|
|
|
|
|
NoT_MATCH_ARG = ['2Ts', '1Ts']
|
|
|
|
|
TOTAL = 10
|
|
|
|
|
|
|
|
|
|
SRC_PATH = os.path.join(TARGET_DIR, 'part6')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_yynn_count(cols):
|
|
|
|
|
n = 0
|
|
|
|
|
for c in cols:
|
|
|
|
|
if c in ('Y', 'Y*', 'N', 'N*'):
|
|
|
|
|
n += 1
|
|
|
|
|
elif c in ('Y/N', 'Y/N*'):
|
|
|
|
|
n += 2
|
|
|
|
|
return n
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_by_order():
|
|
|
|
|
for m in MODEL_LIST:
|
|
|
|
|
result = []
|
|
|
|
|
count = 0
|
|
|
|
|
with open(os.path.join(SRC_PATH, '%s-6.csv' % m), 'rb') as f:
|
|
|
|
|
rows = csv.reader(f)
|
|
|
|
|
for r in rows:
|
|
|
|
|
if NoT_MATCH_ARG and r[2] not in NoT_MATCH_ARG:
|
|
|
|
|
continue
|
|
|
|
|
row = [
|
|
|
|
|
r[0], # R_id
|
|
|
|
|
'Ord_%s' % r[1], # no of R
|
|
|
|
|
r[2], # No of Tree
|
|
|
|
|
m, # model
|
|
|
|
|
] + r[7:]
|
|
|
|
|
row.append(get_yynn_count(r)) # ??????????
|
|
|
|
|
result.append(row)
|
|
|
|
|
count += 1
|
|
|
|
|
if count >= TOTAL and TOTAL > 0:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
output_name = '%s-1.csv' % m
|
|
|
|
|
create_csv(output_name, result, directory='part7')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_by_random():
|
|
|
|
|
for m in MODEL_LIST:
|
|
|
|
|
lo_list = []
|
|
|
|
|
result = []
|
|
|
|
|
with open(os.path.join(SRC_PATH, '%s-6.csv' % m), 'rb') as f:
|
|
|
|
|
rows = csv.reader(f)
|
|
|
|
|
for r in rows:
|
|
|
|
|
l = [
|
|
|
|
|
r[0], # R_id
|
|
|
|
|
'', # no of R
|
|
|
|
|
r[2], # No of Tree
|
|
|
|
|
m, # model
|
|
|
|
|
] + r[7:]
|
|
|
|
|
l.append(get_yynn_count(r)) # ??????????
|
|
|
|
|
lo_list.append(l)
|
|
|
|
|
|
|
|
|
|
picked = []
|
|
|
|
|
ok = False
|
|
|
|
|
_TOTAL = len(lo_list) if TOTAL < 1 else TOTAL
|
|
|
|
|
for i in xrange(0, _TOTAL):
|
|
|
|
|
ok = False
|
|
|
|
|
while not ok:
|
|
|
|
|
pick = random.choice(lo_list)
|
|
|
|
|
if NoT_MATCH_ARG and pick[2] not in NoT_MATCH_ARG:
|
|
|
|
|
continue
|
|
|
|
|
if pick[0] not in picked:
|
|
|
|
|
picked.append(pick[0])
|
|
|
|
|
pick[1] = 'Ran_%s' % i
|
|
|
|
|
ok = True
|
|
|
|
|
|
|
|
|
|
result.append(pick)
|
|
|
|
|
|
|
|
|
|
output_name = '%s-2.csv' % m
|
|
|
|
|
create_csv(output_name, result, directory='part7')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_by_group(model_name, step, **kwargs):
|
|
|
|
|
new_prefix = kwargs.get('prefix', 'XX')
|
|
|
|
|
output_suffix = kwargs.get('suffix', '3')
|
|
|
|
|
|
|
|
|
|
result = []
|
|
|
|
|
count = 1
|
|
|
|
|
fpath = os.path.join(TARGET_DIR, 'part7', '%s-%s.csv' % (model_name, step))
|
|
|
|
|
with open(fpath, 'rb') as f:
|
|
|
|
|
rows = csv.reader(f)
|
|
|
|
|
# do nothing to 1st row
|
|
|
|
|
first = rows.next()
|
|
|
|
|
first[0] = '%s_%s' % (new_prefix, count)
|
|
|
|
|
result.append(first)
|
|
|
|
|
|
|
|
|
|
calibrator = first[:]
|
|
|
|
|
for r in rows:
|
|
|
|
|
count += 1
|
|
|
|
|
# มันดันเอาค่าที่รวมๆ ไปทำ YYNN ด้วยก็เลยตัด col สุดท้ายออก แค่นั้นแหละ
|
|
|
|
|
_r = r[:-1]
|
|
|
|
|
_r[0] = '%s_%s' % (new_prefix, count)
|
|
|
|
|
for i in xrange(5, len(_r)):
|
|
|
|
|
# get rid of asterik first
|
|
|
|
|
calibrator[i] = calibrator[i].replace('*', '')
|
|
|
|
|
# do comparison
|
|
|
|
|
if _r[i] == 'Y/N' or _r[i] == calibrator[i] or calibrator[i] == '':
|
|
|
|
|
pass
|
|
|
|
|
elif (_r[i] == '' and calibrator[i] != '') or calibrator[i] == 'Y/N':
|
|
|
|
|
_r[i] = '%s*' % calibrator[i]
|
|
|
|
|
else: # one Y one N
|
|
|
|
|
_r[i] = 'Y/N*'
|
|
|
|
|
_r.append(get_yynn_count(_r)) # ??????????
|
|
|
|
|
result.append(_r)
|
|
|
|
|
calibrator = _r[:]
|
|
|
|
|
|
|
|
|
|
output_name = '%s-%s.csv' % (model_name, output_suffix)
|
|
|
|
|
create_csv(output_name, result, directory='part7')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
get_by_order()
|
|
|
|
|
get_by_random()
|
|
|
|
|
|
|
|
|
|
for m in MODEL_LIST:
|
|
|
|
|
for s in '12':
|
|
|
|
|
get_by_group(m, s, prefix='Rg', suffix=('3' if s == '1' else '4'))
|