-
Label Encoding + One Hot EncodingAI ML/ML 2021. 1. 21. 00:03
Encoding In [3]:from sklearn.preprocessing import OneHotEncoder from sklearn.preprocessing import LabelEncoder import numpy as np import pandas as pd
In [2]:items = ['폰', '스마트폰', '갤럭시', '아이폰', '애플', '삼성', '애플']
In [6]:# 먼저 숫자 값으로 변환하기 위해 Label Encoder encoder = LabelEncoder() encoder.fit(items) labels = encoder.transform(items)
Out[6]:array([5, 2, 0, 3, 4, 1, 4], dtype=int64)
In [8]:# 2차원 데이터로 변환 labels = labels.reshape(-1,1) labels
Out[8]:array([[5], [2], [0], [3], [4], [1], [4]], dtype=int64)
In [9]:#onehotencoding 적용 oh_encoder = OneHotEncoder() oh_encoder.fit(labels) oh_labels = oh_encoder.transform(labels) oh_labels
Out[9]:<7x6 sparse matrix of type '<class 'numpy.float64'>' with 7 stored elements in Compressed Sparse Row format>
In [12]:print(oh_labels)
(0, 5) 1.0 (1, 2) 1.0 (2, 0) 1.0 (3, 3) 1.0 (4, 4) 1.0 (5, 1) 1.0 (6, 4) 1.0
In [11]:print(oh_labels.toarray())
[[0. 0. 0. 0. 0. 1.] [0. 0. 1. 0. 0. 0.] [1. 0. 0. 0. 0. 0.] [0. 0. 0. 1. 0. 0.] [0. 0. 0. 0. 1. 0.] [0. 1. 0. 0. 0. 0.] [0. 0. 0. 0. 1. 0.]]
In [13]:df = pd.DataFrame({'items' : ['폰', '스마트폰', '갤럭시', '아이폰', '애플', '삼성', '애플']})
In [14]:pd.get_dummies(df)
Out[14]:items_갤럭시 items_삼성 items_스마트폰 items_아이폰 items_애플 items_폰 0 0 0 0 0 0 1 1 0 0 1 0 0 0 2 1 0 0 0 0 0 3 0 0 0 1 0 0 4 0 0 0 0 1 0 5 0 1 0 0 0 0 6 0 0 0 0 1 0 'AI ML > ML' 카테고리의 다른 글
Cross Validation (0) 2021.01.19 How to deal with missing data (0) 2020.06.24 딥러닝에서 Learning Rate를 최적화하는 방법 (0) 2020.06.23