Python × Twitter API 트위터 유형별로 데이터 가져오기

키워드의 트위터 수


누적된 상황


import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
keyword = '五反田'
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# 検索実行
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含むツイート数(累計)を表示
for i in total_tweets_data:
    print('-------------------------------------------')
    print(i)

실행 결과


Image from Gyazo

당일의 상황


import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
keyword = '五反田'
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# 検索実行
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含むツイート数(当日)
for i in range(len(total_tweets_data)):
    if i > 0:
        day_tweets_data.append(
            total_tweets_data[i] - total_tweets_data[i - 1])
    else:
        day_tweets_data.append(total_tweets_data[i])

# キーワードを含むツイート数(当日)を表示
for i in day_tweets_data:
    print('-------------------------------------------')
    print(i)

실행 결과


Image from Gyazo

키워드 RT


누적된 상황


import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含むツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含む && RTを除くツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=key,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        total_exclude_RT_data.append(
            tweet.full_text.strip().replace(
                '\n', '。') + '\n')
    exclude_RT_data.append(len(total_exclude_RT_data))

# ツイート数 - RTを除くツイート数 = RT数
total_RT_data = [x - y for (x, y) in zip(total_tweets_data, exclude_RT_data)]

# キーワードを含むRT数(累計)を表示
for i in total_RT_data:
    print('-------------------------------------------')
    print(i)

실행 결과


Image from Gyazo

당일의 상황


import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含むツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含む && RTを除くツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=key,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        total_exclude_RT_data.append(
            tweet.full_text.strip().replace(
                '\n', '。') + '\n')
    exclude_RT_data.append(len(total_exclude_RT_data))

# ツイート数 - RTを除くツイート数 = RT数
total_RT_data = [x - y for (x, y) in zip(total_tweets_data, exclude_RT_data)]

# キーワードを含むRT数(当日)
for i in range(len(total_RT_data)):
    if i > 0:
        day_RT_data.append(total_RT_data[i] - total_RT_data[i - 1])
    else:
        day_RT_data.append(total_RT_data[i])

# キーワードを含むRT数(当日)を表示
for i in day_RT_data:
    print('-------------------------------------------')
    print(i)

실행 결과


Image from Gyazo

키워드가 포함된 참조 수


누적된 상황


import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
quote_data = []
total_quote_data = []
day_quote_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含む引用数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        quote_data.append(tweet.is_quote_status)
    total_quote_data.append(quote_data.count(True))

# キーワードを含む引用数(累計)を表示
for i in total_quote_data:
    print('-------------------------------------------')
    print(i)

실행 결과


Image from Gyazo

당일의 상황


import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
quote_data = []
total_quote_data = []
day_quote_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含む引用数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        quote_data.append(tweet.is_quote_status)
    total_quote_data.append(quote_data.count(True))

# キーワードを含む引用数(当日)
for i in range(len(total_quote_data)):
    if i > 0:
        day_quote_data.append(total_quote_data[i] - total_quote_data[i - 1])
    else:
        day_quote_data.append(total_quote_data[i])

# キーワードを含む引用数(当日)を表示
for i in day_quote_data:
    print('-------------------------------------------')
    print(i)

실행 결과


Image from Gyazo

참고 문헌

  • 트위터의 서치 API 사용법이 혼란스러우니 요약하자면
  • Twitter Rest API에서 얻을 수 있는 데이터 요약
  • Django+Twitter API를 통한 어플리케이션 만들기
  • Django에 Plantly 차트 표시
  • Tweepy에서 트위터를 검색하면 좋네요.
  • [Tweepy] 트위터 API에서 데이터를 수집하는 데 오류가 있으면...
  • psyhon의 matplotlib로 도표를 만들었어요.
  • Django에서 Platly를 사용한 차트
  • [Pythhon] Platly로 돌리는 그래프 만들기
  • Platly를 사용하여 폴리라인 및 복합 차트
  • 【Python×Twitter API: Twitter 클라이언트를 만들기 전의 길[API에서 장난치기]
  • 좋은 웹페이지 즐겨찾기