|
|
|
|
|
# query
|
|
|
|
|
|
hive> SELECT name, subordinates[0] FROM employees;
|
|
|
|
|
|
John Doe Mary Smith
|
|
|
Mary Smith Bill King
|
|
|
Todd Jones NULL
|
|
|
|
|
|
# expression
|
|
|
|
|
|
hive> SELECT upper(name), salary, deductions["Federal Taxes"],
|
|
|
round(salary * (1 - deductions["Federal Taxes"])) FROM employees;
|
|
|
|
|
|
SELECT count(*), avg(salary) FROM employees;
|
|
|
|
|
|
# distinct
|
|
|
|
|
|
SELECT count(DISTINCT symbol) FROM stocks;
|
|
|
|
|
|
# limit
|
|
|
|
|
|
hive> SELECT upper(name), salary, deductions["Federal Taxes"],
|
|
|
> round(salary * (1 - deductions["Federal Taxes"])) FROM employees
|
|
|
> LIMIT 2;
|
|
|
|
|
|
JOHN DOE 100000.0 0.2 80000
|
|
|
MARY SMITH 80000.0 0.2 64000
|
|
|
|
|
|
# 列名 别名
|
|
|
|
|
|
hive> SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
|
|
|
> round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
|
|
|
> FROM employees LIMIT 2;
|
|
|
|
|
|
# 嵌套select(不可有having在内部select)
|
|
|
|
|
|
hive> FROM (
|
|
|
> SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
|
|
|
> round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
|
|
|
> FROM employees
|
|
|
> ) e
|
|
|
> SELECT e.name, e.salary_minus_fed_taxes
|
|
|
> WHERE e.salary_minus_fed_taxes > 70000;
|
|
|
|
|
|
JOHN DOE 100000.0 0.2 80000
|
|
|
|
|
|
# case when then
|
|
|
|
|
|
hive> SELECT name, salary,
|
|
|
> CASE
|
|
|
> WHEN salary < 50000.0 THEN 'low'
|
|
|
> WHEN salary >= 50000.0 AND salary < 70000.0 THEN 'middle'
|
|
|
> WHEN salary >= 70000.0 AND salary < 100000.0 THEN 'high'
|
|
|
> ELSE 'very high'
|
|
|
> END AS bracket FROM employees;
|
|
|
|
|
|
John Doe 100000.0 very high
|
|
|
Mary Smith 80000.0 high
|
|
|
Todd Jones 70000.0 high
|
|
|
Bill King 60000.0 middle
|
|
|
Boss Man 200000.0 very high
|
|
|
|
|
|
# 使用like和rlick
|
|
|
|
|
|
like是如同sql语句
|
|
|
hive> SELECT name, address.street FROM employees WHERE address.street LIKE
|
|
|
'%Chi%';
|
|
|
|
|
|
rlick可以使用如同java的正则
|
|
|
hive> SELECT name, address.street
|
|
|
> FROM employees WHERE address.street RLIKE '.*(Chicago|Ontario).*';
|
|
|
|
|
|
Mary Smith 100 Ontario St.
|
|
|
Todd Jones 200 Chicago Ave.
|
|
|
|
|
|
# group by语句
|
|
|
|
|
|
hive> SELECT year(ymd), avg(price_close) FROM stocks
|
|
|
> WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
|
|
|
> GROUP BY year(ymd);
|
|
|
|
|
|
1984 25.578625440597534
|
|
|
|
|
|
# having语句
|
|
|
|
|
|
hive> SELECT year(ymd), avg(price_close) FROM stocks
|
|
|
> WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
|
|
|
> GROUP BY year(ymd)
|
|
|
> HAVING avg(price_close) > 50.0;
|
|
|
|
|
|
1987 53.88968399108163
|
|
|
1991 52.49553383386182
|
|
|
|
|
|
# join inner(建议将最大的table放在最后)
|
|
|
|
|
|
hive> SELECT a.ymd, a.price_close, b.price_close
|
|
|
> FROM stocks a JOIN stocks b ON a.ymd = b.ymd
|
|
|
> WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM';
|
|
|
|
|
|
仅支持等值连接
|
|
|
|
|
|
# left outer join
|
|
|
|
|
|
hive> SELECT s.ymd, s.symbol, s.price_close, d.dividend
|
|
|
> FROM stocks s LEFT OUTER JOIN dividends d ON s.ymd = d.ymd AND s.symbol =
|
|
|
|
|
|
d.symbol
|
|
|
> WHERE s.symbol = 'AAPL';
|
|
|
...
|
|
|
1987-05-01 AAPL 80.0 NULL
|
|
|
1987-05-04 AAPL 79.75 NULL
|
|
|
1987-05-05 AAPL 80.25 NULL |
|
|
\ No newline at end of file |