

All ratings are contained in the file "ratings.dat" and are in the
following format:


- UserIDs range between 1 and 6040
- MovieIDs range between 1 and 3952
- Ratings are made on a 5-star scale (whole-star ratings only)
- Timestamp is represented in seconds since the epoch as returned by time(2)
- Each user has at least 20 ratings

User information is in the file "users.dat" and is in the following


All demographic information is provided voluntarily by the users and is
not checked for accuracy. Only users who have provided some demographic
information are included in this data set.

- Gender is denoted by a "M" for male and "F" for female
- Age is chosen from the following ranges:

* 1: "Under 18"
* 18: "18-24"
* 25: "25-34"
* 35: "35-44"
* 45: "45-49"
* 50: "50-55"
* 56: "56+"

- Occupation is chosen from the following choices:

* 0: "other" or not specified
* 1: "academic/educator"
* 2: "artist"
* 3: "clerical/admin"
* 4: "college/grad student"
* 5: "customer service"
* 6: "doctor/health care"
* 7: "executive/managerial"
* 8: "farmer"
* 9: "homemaker"
* 10: "K-12 student"
* 11: "lawyer"
* 12: "programmer"
* 13: "retired"
* 14: "sales/marketing"
* 15: "scientist"
* 16: "self-employed"
* 17: "technician/engineer"
* 18: "tradesman/craftsman"
* 19: "unemployed"
* 20: "writer"


Movie information is in the file "movies.dat" and is in the following


- Titles are identical to titles provided by the IMDB (including
year of release)
- Genres are pipe-separated and are selected from the following genres:

* Action
* Adventure
* Animation
* Children's
* Comedy
* Crime
* Documentary
* Drama
* Fantasy
* Film-Noir
* Horror
* Musical
* Mystery
* Romance
* Sci-Fi
* Thriller
* War
* Western




create database movies;
use movies;
CREATE TABLE users(userid:Long);
create table users(userid:Bigint);
CREATE TABLE ratings(userid Int,movieid Int,rating Int,timestamp Timestamp)PARTITIONED BY(dt String) ROW FORMAT DELIMITED FIELDS TERMINATED BY '::';
出错:FAILED: ParseException line 1:55 Failed to recognize predicate 'timestamp'. Failed rule: 'identifier' in column specification


CREATE TABLE ratings(userid Int,movieid Int,rating Int,timestamped Timestamp)PARTITIONED BY(dt String) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

LOAD DATA LOCAL INPATH '/home/dyq/Documents/movies/ratings-douhao.dat' into table ratings PARTITION(dt="20161201");
hive> select * from ratings limit 10;
1 1193 5 NULL 20161201
1 661 3 NULL 20161201
1 914 3 NULL 20161201
1 3408 4 NULL 20161201
1 2355 5 NULL 20161201
1 1197 3 NULL 20161201
1 1287 5 NULL 20161201
1 2804 5 NULL 20161201
1 594 4 NULL 20161201
1 919 4 NULL 20161201


drop table ratings;
CREATE TABLE ratings(userid Int,movieid Int,rating Int,timestamped String)PARTITIONED BY(dt String) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

hive> select * from ratings limit 10;
1 1193 5 978300760 20161201
1 661 3 978302109 20161201
1 914 3 978301968 20161201
1 3408 4 978300275 20161201
1 2355 5 978824291 20161201
1 1197 3 978302268 20161201
1 1287 5 978302039 20161201
1 2804 5 978300719 20161201
1 594 4 978302268 20161201
1 919 4 978301368 20161201
Time taken: 0.122 seconds, Fetched: 10 row(s)



CREATE TABLE movies(movieid Int,title String,genres String)PARTITIONED BY(dt String) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

LOAD DATA LOCAL INPATH '/home/dyq/Documents/movies/movies-douhao.dat' into table movies PARTITION(dt="20161201");

CREATE TABLE users(userid Int,gender String,age Int,occupation String,zip-code String)PARTITIONED BY(dt String) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

FAILED: ParseException line 1:73 cannot recognize input near '-' 'code' 'String' in column type

CREATE TABLE users(userid Int,gender String,age Int,occupation String,zipcode String)PARTITIONED BY(dt String) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

LOAD DATA LOCAL INPATH '/home/dyq/Documents/movies/users-douhao.dat' into table users PARTITION(dt="20161201");

hive> select * from users limit 10;
1 F 1 10 48067 20161201
2 M 56 16 70072 20161201
3 M 25 15 55117 20161201
4 M 45 7 02460 20161201
5 M 25 20 55455 20161201
6 F 50 9 55117 20161201
7 M 35 1 06810 20161201
8 M 25 12 11413 20161201
9 M 25 17 61614 20161201
10 F 35 1 95370 20161201
Time taken: 0.168 seconds, Fetched: 10 row(s)


create index ratings_userid_index on table ratings(userid) as 'COMPACT' with deferred rebuild;
show index on ratings;
drop index ratings_userid_index on ratings;

create index ratings_movieid_index on table ratings(movieid) as 'COMPACT' with deferred rebuild;
show index on ratings;
drop index ratings_movieid_index on ratings;

select movies.movieid,movies.title,ratings.rating from movies join ratings on(movies.movieid=ratings.movieid);
Time taken: 40.721 seconds, Fetched: 1000209 row(s)

Time taken: 40.816 seconds, Fetched: 1000209 row(s)

select movies.movieid,movies.title,ratings.rating from movies join ratings on(movies.movieid=ratings.movieid) where movies.movieid=2716;
Time taken: 33.834 seconds, Fetched: 2181 row(s)

drop index ratings_movieid_index on ratings;
drop index ratings_userid_index on ratings;
select movies.movieid,movies.title,ratings.rating from movies join ratings on(movies.movieid=ratings.movieid) where movies.movieid=2716;

Time taken: 29.428 seconds, Fetched: 2181 row(s)


