학습 내용

어제 복습

Python으로 이미지 분석을 하기 위해 접속키(subscription_key)와 접속할 수 있는 url(vision_base_url)이 필요하다.

analyze_url = vision_base_url + 'analyze'

가져온 url 뒤에 analyze 를 붙이면 분석하겠다는 의미이다.

response = requests.get(image_url)
image = Image.open(BytesIO(response.content))

이미지데이터를 Get 요청하고 응답을 response에 저장한다.
response.content에 담긴 이미지의 바이트를 BytesIO로 읽은 후 이미지를 연다.

# Image Analysis

사용한 이미지

headers = {'Ocp-Apim-Subscription-Key' : subscription_key }
prams = { 'visualFeatures' : 'Categories,Description,Color'}
data = {'url':image_url}

Categories, Description, Color 값을 가져온다

response = requests.post(analyze_url, headers=headers, params=prams,  json=data)

결과를 받기 위해 변수를 하나 지정하고 요청한다.

analysis = response.json()

결과를 JSON 방식으로 받아온다.

결과 해석

#categories 이미지를 분류
{'categories': [{'name': 'outdoor_', #밖이다
   'score': 0.12890625, #정확도
   'detail': {'landmarks': []}}], #랜드마크가 있으면 여기에 적힘
#color 색 분석
 'color': {'dominantColorForeground': 'Black', #전체적인 컬러
  'dominantColorBackground': 'Black', #배경색
  'dominantColors': [],
  'accentColor': '2B63A0',
  'isBwImg': False,
  'isBWImg': False},
#description tags 이미지에서 찾을 수 있는 요소들을 tag해놓았다
 'description': {'tags': ['water',
   'bird',
   'lake',
   'table',
   'sitting',
   'body',
   'small',
   'pond',
   'standing',
   'pier',
   'dock',
   'large',
   'row',
   'boat',
   'swimming',
   'group',
   'ocean',
   'blue',
   'umbrella',
   'flying'],
#captions ai가 이미지를 한 줄로 설명한 것
  'captions': [{'text': 'a bird sitting on top of a body of water',
    'confidence': 0.7277893270684617}]},
 'requestId': '38c3f8a7-7217-4d97-9d4f-9a3cb9f1a862',
 'metadata': {'height': 801, 'width': 1200, 'format': 'Jpeg'}}

JSON
인터넷으로 데이터 주고받을때 많이 사용되는 방식
가볍고 명확하기 때문에 지금은 표준처럼 사용된다.
계층적으로 구성되어 있으며 여러 개의 속성을 콤마로 구분한다.
python의 딕셔너리 타입와 맞아떨어진다.
caption의 text 가져오기

analysis['description']['captions'][0]['text']

'a bird sitting on top of a body of water'

analysis 변수 안에 들어 있는 description 항목 안에 있는 captions의 0번째 항목의 text의 값을 가져온다.

# Object Detection

특정한 요소들이 어떤 위치에 있는지 찾아낸다

objectdetection_url = vision_base_url + 'detect'
#Object Detection를 하기 위해 detect을 추가

#요청하기 위한 헤더정보 세팅
headers = {'Ocp-Apim-Subscription-Key' : subscription_key}
#데이터로 이미지 url 세팅
data = {'url':object_image}
#post로 요청
response = requests.post(objectDetection_url,
                        headers = headers,
                        json = data)
detectionResult = response.json()
detectionResult

결과 해석
taxi, person, bus를 감지한 것을 확인할 수 있다.

사각형 박스 그리기

#detectionResult에서 리스트를 가져온다.
objects = detectionResult['objects']
#for문으로 하나하나 가져온다
for obj in objects:
    print(obj)
	#rectangle 객체를 가져온다
    rect = obj['rectangle']
    
	#좌표를 가져온다
    x = rect['x']
    y = rect['y']
    w = rect['w']
    h = rect['h']
    
	#사각형을 그린다
    draw.rectangle(((x,y),(x+w,y+h)),outline='red')
	#rectangle((첫번째 좌표, 두번째 좌표), 컬러지정)

텍스트 추가

	#글자를 입력하는 코드
	objectName = obj['object']
	draw.text((x,y),objectName, fill='red')
	#draw.text(좌표, 오브젝트 이름, 색 지정)

# Face Detection 얼굴 인식

사람의 얼굴을 인식

사진 속에서 정확한 얼굴의 좌표를 파악할 수 있다.
나이, 성별, 웃는 정도, 수염, 감정, 머리 각도 등 다양한 얼굴의 특성을 확인한다.

Face API를 사용하기 위해 새로운 주소를 생성해야 한다.

#Subscription Key 설정
Subscription Key = 'MY_ACCESS_KEY'
#서비스 주소를 가져온다
faceDetection_url = 'MY_COMPUTER_VISION_ENDPOINT/face/v1.0/detect'
#face 서비스를 사용하고 버전 1을 사용하고 detect를 할 것이다.

headers = {'Ocp-Apim-Subscription-key': subscrition_key}
#페이스 아이디, 나이와 성별정보를 가져온다
params = {
    'returnFaceID': 'true',
    'returnFaceAttributes': 'age,gender'
}
data = {'url': image_url}
response = requests.post(faceDetection_url,
                       headers=headers,
                       params=params,
                       json=data)
faces = response.json()

결과 해석
faceId : 유일한 값을 나타내는 난수
faceRectangle: 위치를 나타냄
faceAttributes: 얼굴의 속성값

배열의 요소를 하나씩 확인한다

#이미지를 수정할 수 있게 열기
draw = ImageDraw.Draw(img)

#좌표를 가져와 사각형을 그린다
for face in faces:
    
    rect = face['faceRectangle']
    
    left = rect['left']
    top = rect['top']
    width = rect['width']
    height = rect['height']
    
    draw.rectangle(((left,top),(left+width,top+height)), outline='red')

	#얼굴 속성 정보를 가져와 정보를 텍스트로 표현한다.
    face_info = face['faceAttributes']
    age = str(face_info['age'])	#str으로 변환해줘야 함
    gender = str(face_info['gender'])	# str으로 변환해줘야 함
    result = 'Gender:'+ gender + ' Age:' + age
    
    draw.text((left, top), result, fill='red')

# Emotion Recognition 감정 인식

#감정 데이터를 가져오기 위해 속성을 추가한다
headers = {'Ocp-Apim-Subscription-key': subscrition_key}
params = {
    'returnFaceID': 'true',
    'returnFaceAttributes': 'age,gender,emotion'
}
#emotion 추가

	#faceAttributes 안에 있는 emotion의 happiness 정보를 불러온다
    face_info = face['faceAttributes']
    emotion = face_info['emotion']
    happiness = emotion['happiness']
    gender = str(face_info['gender'])
    result = 'Gender:'+ gender + ' happiness:' + str(happiness*100)

해당 코드만 불러오면 이미 그려진 텍스트 위에 덮어씌워지므로 위의 코드에서 이미지부터 다시 불러와야 제대로 그려진다.

# OCR 이미지 분석

#OCR은 visionAPI key 값을 사용하면 된다
subscription_key = 'YOUR_ACCESS_KEY'
#마찬가지로 visionAPI 의 엔드포인트를 사용하면 된다.
vision_base_url = 'YOUR_COMPUTER_VISION_ENDPOINT/vison/v2.0'
#엔드포인트/사용할API/버전

#ocr 사용하기 위해 추가
ocr_url = vision_base_url + 'ocr'
#분석할 이미지를 세팅
image_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/a/af/Atomist_quote_from_Democritus.png/338px-Atomist_quote_from_Democritus.png'
#이미지를 불러와 출력
img = Image.open(BytesIO(requests.get(image_url).content))
img

#'language': 'unk' - unknown 언어를 미지정, 컴퓨터가 어떤 언어인지 분석
#'detectOrientation': 'true' - 기울기를 인식하겠다는 뜻
params  = {'language': 'unk', 'detectOrientation': 'true'}

결과 해석

{'language': 'en',	<-- 언어는 영어였다
 'textAngle': 0.0,	<-- 기울어져있지 않다
 'orientation': 'Up',
 'regions': [{'boundingBox': '21,16,304,451',
   'lines': [{'boundingBox': '28,16,288,41',
     'words': [{'boundingBox': '28,16,288,41', 'text': 'NOTHING'}]}, <-- 텍스트 NOTHING의 좌표
    {'boundingBox': '27,66,283,52',
     'words': [{'boundingBox': '27,66,283,52', 'text': 'EXISTS'}]},
    {'boundingBox': '27,128,292,49',
     'words': [{'boundingBox': '27,128,292,49', 'text': 'EXCEPT'}]},
    {'boundingBox': '24,188,292,54',
     'words': [{'boundingBox': '24,188,292,54', 'text': 'ATOMS'}]},
    {'boundingBox': '22,253,297,32',
     'words': [{'boundingBox': '22,253,105,32', 'text': 'AND'},
      {'boundingBox': '144,253,175,32', 'text': 'EMPTY'}]},
    {'boundingBox': '21,298,304,60',
     'words': [{'boundingBox': '21,298,304,60', 'text': 'SPACE.'}]},
    {'boundingBox': '26,387,294,37',
     'words': [{'boundingBox': '26,387,210,37', 'text': 'Everything'},
      {'boundingBox': '249,389,71,27', 'text': 'else'}]},
    {'boundingBox': '127,431,198,36',
     'words': [{'boundingBox': '127,431,31,29', 'text': 'is'},
      {'boundingBox': '172,431,153,36', 'text': 'opinion.'}]}]}]}

region : 텍스트가 있는 단위
복잡한 문서의 경우 region이 여러 개이다

#regions에서 region 수만큼 반복하여 뽑아내서  각 region의 lines에 대한 정보를 line_infos에 저장한다
line_infos = [region["lines"] for region in analysis["regions"]]
word_infos = []
for line in line_infos:
    for word_metadata in line:
        for word_info in word_metadata["words"]:	#words내용만 뽑아
        	word_infos.append(word_info)	#word_infos에 추가하고 
word_infos	# 출력

학습 후기

지난 시간에 python의 딕셔너리, 리스트, 객체, 그리고 각 요소를 for문으로 확인해본 것이 이번 시간에 유용하게 사용되었고 json을 이해하는데 도움되었다.
오늘 수업은 ai로 어떤 일을 할 수 있는지 실제로 경험해보는 시간이였다. 이미지들에서 오브젝트를 인식해 여러 정보를 분석해 보는 게 신기하면서도 재밌었다.
몇년 전 유행했던 닮은꼴 연예인 찾아주는 사이트와 얼굴 나이 테스트에 이런 기술을 사용했다니, 당시에는 재밌는 유행이라고만 생각했는데 굉장한 서비스를 사용한 것이였다.
재밌는 것과는 별개로 정리하면서 공부를 추가로 더 해야겠다고 뼈저리게 느껴 남는 시간에 틈틈히 공부할 생각이다.

Author And Source

이 문제에 관하여(AI - Cognitive Services), 우리는 이곳에서 더 많은 자료를 발견하고 링크를 클릭하여 보았다 https://velog.io/@hanss1122/AI-Cognitive-Services

우수한 개발자 콘텐츠 발견에 전념 (Collection and Share based on the CC Protocol.)

AI - Cognitive Services