-- (x - min) / (max - min) 归一化 create temporary macro percentagevalue(value double, min_value double, max_value double) case when value is null or min_value = max_value then 0.5 else (value-min_value)/(max_value-min_value) end; desc user_friend_event; create table train_date row format delimited fields terminated by "," stored as textfile as with pre_tain_data as ( select min(user_age) as user_min_age, max(user_age) as user_max_age, min(user_member_days) as user_min_member_days, max(user_member_days) as user_max_member_days, min(user_friend_count) as user_min_friend_count, max(user_friend_count) as user_max_friend_count, min(user_invited_event_count) as user_min_invited_event_count, max(user_invited_event_count) as user_max_invited_event_count, min(invite_ahead_days) as invited_min_ahead_days, max(invite_ahead_days) as invited_max_ahead_days, min(event_city_level) as event_min_city_level, max(event_city_level) as event_max_city_level, min(event_country_level) as event_min_country_level, max(event_country_level) as event_max_country_level, min(event_lat) as event_min_lat, max(event_lat) as event_max_lat, min(event_lng) as event_min_lng, max(event_lng) as event_max_lng, min(event_start_month) as event_min_start_month, max(event_start_month) as event_max_start_month, min(event_start_dayofweek) as event_min_start_dayofweek, max(event_start_dayofweek) as event_max_start_dayofweek, min(event_start_hour) as event_min_start_hour, max(event_start_hour) as event_max_start_hour, min(start_ahead_days) as start_min_ahead_days, max(start_ahead_days) as start_max_ahead_days, min(invited_friends_count) as invited_min_friends_count, max(invited_friends_count) as invited_max_friends_count, min(maybe_friends_count) as maybe_min_friends_count, max(maybe_friends_count) as maybe_max_friends_count, min(yes_friends_count) as yes_min_friends_count, max(yes_friends_count) as yes_max_friends_count, min(no_friends_count) as no_min_friends_count, max(no_friends_count) as no_max_friends_count, min(user_had_event_count) as user_min_had_event_count, max(user_had_event_count) as user_max_had_event_count, min(user_maybe_attend_event_count) as user_min_maybe_attend_event_count, max(user_maybe_attend_event_count) as user_max_maybe_attend_event_count, min(user_no_attend_event_count) as user_min_no_attend_event_count, max(user_no_attend_event_count) as user_max_no_attend_event_count, min(user_yes_attend_event_count) as user_min_yes_attend_event_count, max(user_yes_attend_event_count) as user_max_yes_attend_event_count, min(user_locale) as user_min_locale, max(user_locale) as user_max_locale, min(user_time_zone) as user_min_time_zone, max(user_time_zone) as user_max_time_zone, min(event_invited_user_count) as event_min_invited_user_count, max(event_invited_user_count) as event_max_invited_user_count, min(event_yes_user_count) as event_min_yes_user_count, max(event_yes_user_count) as event_max_yes_user_count, min(event_no_user_count) as event_min_no_user_count, max(event_no_user_count) as event_max_no_user_count, min(event_maybe_user_count) as event_min_maybe_user_count, max(event_maybe_user_count) as event_max_maybe_user_count from user_friend_event ) select t.userid, t.eventid, t.user_interested, t.user_invited, percentagevalue(t.event_city_level, s.event_min_city_level, s.event_max_city_level) as event_city_level, percentagevalue(t.event_country_level, s.event_min_country_level, s.event_max_country_level) as event_country_level, percentagevalue(t.event_lat, s.event_min_lat, s.event_max_lat) as event_lat, percentagevalue(t.event_lng, s.event_min_lng, s.event_max_lng) as event_lng, percentagevalue(t.invite_ahead_days, s.invited_min_ahead_days, s.invited_max_ahead_days) as invite_ahead_days, percentagevalue(t.event_start_month, s.event_max_start_month, s.event_min_start_month) as event_start_month, percentagevalue(t.event_start_dayofweek, s.event_max_start_dayofweek, s.event_min_start_dayofweek) as event_start_dayofweek, percentagevalue(t.event_start_hour, s.event_max_start_hour, s.event_min_start_hour) as event_start_hour, percentagevalue(t.start_ahead_days, s.start_min_ahead_days, s.start_max_ahead_days) as start_ahead_days, percentagevalue(t.user_friend_count, s.user_min_friend_count, s.user_max_friend_count) as user_friend_count, t.event_creator_is_friend, percentagevalue(t.invited_friends_count, s.invited_min_friends_count, s.invited_max_friends_count) as invited_friends_count, percentagevalue(t.maybe_friends_count, s.maybe_min_friends_count, s.maybe_max_friends_count) as maybe_friends_count, percentagevalue(t.yes_friends_count, s.yes_min_friends_count, s.yes_max_friends_count) as yes_friends_count, percentagevalue(t.no_friends_count, s.no_min_friends_count, s.no_max_friends_count) as no_friends_count, t.invited_friends_percentage, t.yes_friends_percentage, t.no_friends_percentage, t.maybe_friends_percentage, percentagevalue(t.user_age, s.user_min_age, s.user_max_age) as user_age, t.user_gender, percentagevalue(t.user_member_days, s.user_min_member_days, s.user_max_member_days) as user_member_days, t.location_similar, percentagevalue(t.user_had_event_count, s.user_min_had_event_count, s.user_max_had_event_count) as user_had_event_count, percentagevalue(t.user_invited_event_count, s.user_min_invited_event_count, s.user_max_invited_event_count) as user_invited_event_count, percentagevalue(t.user_maybe_attend_event_count, s.user_min_maybe_attend_event_count, s.user_max_maybe_attend_event_count) as user_maybe_attend_event_count, percentagevalue(t.user_no_attend_event_count, s.user_min_no_attend_event_count, s.user_max_no_attend_event_count) as user_no_attend_event_count, percentagevalue(t.user_yes_attend_event_count, s.user_min_yes_attend_event_count, s.user_max_yes_attend_event_count) as user_yes_attend_event_count, percentagevalue(t.user_locale, s.user_min_locale, s.user_max_locale) as user_locale, percentagevalue(t.user_time_zone, s.user_min_time_zone, s.user_max_time_zone) as user_time_zone, percentagevalue(t.event_invited_user_count, s.event_min_invited_user_count, s.event_max_invited_user_count) as event_invited_user_count, percentagevalue(t.event_yes_user_count, s.event_min_yes_user_count, s.event_max_yes_user_count) as event_yes_user_count, percentagevalue(t.event_no_user_count, s.event_min_no_user_count, s.event_max_no_user_count) as event_no_user_count, percentagevalue(t.event_maybe_user_count, s.event_min_maybe_user_count, s.event_max_maybe_user_count) as event_maybe_user_count from dm_attendee.user_friend_event t cross join pre_tain_data s; select count(*) from train_date; select * from user_friend_event where user_locale is null ;
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)