●
●
●
●
●○○○○
●●●
●●●
●
●
…
CREATE EXTERNAL TABLE emails ( mid STRUCT<ts: TIMESTAMP, value: STRING>, dateLong STRUCT<ts: TIMESTAMP, value: BIGINT>, fromStr STRUCT<ts: TIMESTAMP, value: STRING>, toStr STRUCT<ts: TIMESTAMP, value: STRING>, subject STRUCT<ts: TIMESTAMP, value: STRING>, body STRUCT<ts: TIMESTAMP, value: STRING>,) STORED BY 'org.kiji.hive.KijiTableStorageHandler'WITH SERDEPROPERTIES ( 'kiji.columns' = ‘info:mid[0],info:date[0],info:from[0],info:to[0],’ + ‘info:subject[0],info:body[0]’) TBLPROPERTIES ( 'kiji.table.uri' = ' kiji://.env/enron_email/emails ');
SELECT
fromStr.value AS fromStr,
count(1) AS count
FROM emails
GROUP BY fromStr.value
ORDER BY count DESC
LIMIT 10;
SELECT fromStr.value AS fromStr, trim(splitToStr) AS toStr, count(1) AS countFROM emails LATERAL VIEW explode(split(toStr.value,',')) tos AS splitToStrGROUP BY fromStr.value,trim(splitToStr)ORDER BY count DESCLIMIT 10;
●●
●○
○
User Emails
Emails Table Sentiment
Producer
SELECT ((year(datelong.ts)-1999)*52+weekofyear(datelong.ts)) AS weeknum, avg(sentiment.value) AS avgsentiment, stddev(sentiment.value) AS stddevsentiment, count(1) AS nummessagesFROM emailsWHERE regexp_replace(fromStr.value,".*@","")=="enron.com" GROUP BY ((year(datelong.ts)-1999)*52+weekofyear(datelong.ts));
SELECT lword AS word, sum(sentiment) AS totalsentimentFROM ( SELECT mid.value AS mid, lower(word) AS lword, sentiment.value AS sentiment FROM emails LATERAL VIEW explode(sentences(body.value)[0]) wds AS word WHERE regexp_replace(fromStr.value,".*@","")=="enron.com") subqueryGROUP BY lwordORDER BY totalsentiment ASC;
●●
●
●
●
●
●