Hortonworks는 Sentimental Analysis의 Hiveddl을 만드는 데 사용됩니다.ql 파일
16675 단어 hive
http://hortonworks.com/hadoop-tutorial/how-to-refine-and-visualize-sentiment-data/
ADD JAR json-serde-1.1.6-SNAPSHOT-jar-with-dependencies.jar;
--create the tweets_raw table containing the records as received from Twitter
CREATE EXTERNAL TABLE tweets_raw (
id BIGINT,
created_at STRING,
source STRING,
favorited BOOLEAN,
retweet_count INT,
retweeted_status STRUCT<
text:STRING,
user:STRUCT<screen_name:STRING,name:STRING>>,
entities STRUCT<
urls:ARRAY<STRUCT<expanded_url:STRING>>,
user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>,
hashtags:ARRAY<STRUCT<text:STRING>>>,
text STRING,
user STRUCT<
screen_name:STRING,
name:STRING,
friends_count:INT,
followers_count:INT,
statuses_count:INT,
verified:BOOLEAN,
utc_offset:STRING, -- was INT but nulls are strings
time_zone:STRING>,
in_reply_to_screen_name STRING,
year int,
month int,
day int,
hour int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION '/user/hue/upload/upload/data/tweets_raw'
;
-- create sentiment dictionary
CREATE EXTERNAL TABLE dictionary (
type string,
length int,
word string,
pos string,
stemmed string,
polarity string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
LOCATION '/user/hue/upload/upload/data/dictionary';
CREATE EXTERNAL TABLE time_zone_map (
time_zone string,
country string,
notes string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
LOCATION '/user/hue/upload/upload/data/time_zone_map';
-- Clean up tweets
CREATE VIEW tweets_simple AS
SELECT
id,
cast ( from_unixtime( unix_timestamp(concat( '2013 ', substring(created_at,5,15)), 'yyyy MMM dd hh:mm:ss')) as timestamp) ts,
text,
user.time_zone
FROM tweets_raw
;
CREATE VIEW tweets_clean AS
SELECT
id,
ts,
text,
m.country
FROM tweets_simple t LEFT OUTER JOIN time_zone_map m ON t.time_zone = m.time_zone;
-- Compute sentiment
create view l1 as select id, words from tweets_raw lateral view explode(sentences(lower(text))) dummy as words;
create view l2 as select id, word from l1 lateral view explode( words ) dummy as word ;
-- was: select * from l2 left outer join dict d on l2.word = d.word where polarity = 'negative' limit 10;
create view l3 as select
id,
l2.word,
case d.polarity
when 'negative' then -1
when 'positive' then 1
else 0 end as polarity
from l2 left outer join dictionary d on l2.word = d.word;
create table tweets_sentiment stored as orc as select
id,
case
when sum( polarity ) > 0 then 'positive'
when sum( polarity ) < 0 then 'negative'
else 'neutral' end as sentiment
from l3 group by id;
-- put everything back together and re-number sentiment
CREATE TABLE tweetsbi
STORED AS ORC
AS
SELECT
t.*,
case s.sentiment
when 'positive' then 2
when 'neutral' then 1
when 'negative' then 0
end as sentiment
FROM tweets_clean t LEFT OUTER JOIN tweets_sentiment s on t.id = s.id;
-- for Tableau or Excel
-- UDAF sentiscore = sum(sentiment)*50 / count(sentiment)
-- context n-gram made readable
CREATE TABLE twitter_3grams
STORED AS RCFilese
AS
SELECT year, month, day, hour, snippet
FROM
( SELECT
year,
month,
day,
hour,
context_ngrams(sentences(lower(text)), array("iron","man","3",null,null,null), 10) ngs
FROM tweets group by year,month,day, hour
) base
LATERAL VIEW
explode( ngs ) ngsTab AS snippet -- ngsTab is random alias => must be there even though not used
;
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
Spark + HWC로 Hive 테이블을 만들고 자동으로 Metadata를 Atlas에 반영합니다.HDP 3.1.x의 경우 Spark + HWC에서 Hive 테이블을 만들고 자동으로 Metadata를 Atlas에 반영하는 방법이 있습니다. 방법: 전제조건: Hive Warehouse Connector (HWC) ...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.