-
Notifications
You must be signed in to change notification settings - Fork 3
Team recommendation update #50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
bd370a9
9b3279e
bf39d26
6016e1b
1ca03bc
7a08ec3
750a2c8
7fd2fa1
9b79277
1f82f9d
425e5eb
e39c640
18fc6a5
2ed9c19
8d9c1d4
6156ed5
a46d75e
3de87b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,3 +48,5 @@ Werkzeug==0.16.1 | |
| wrapt==1.12.1 | ||
| wsgi-request-logger==0.4.6 | ||
| zappa==0.51.0 | ||
|
|
||
| fuzzywuzzy~=0.18.0 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,28 @@ | ||
| from src.flaskapp.db import coll | ||
| from src.flaskapp.util import format_team_object | ||
| from fuzzywuzzy import fuzz | ||
|
|
||
|
|
||
| def parse_user_to_string(user): | ||
| if not user: | ||
| return {"message": "Invalid user or user not exist"}, 403 | ||
|
|
||
| interests = user["interests"] | ||
| prizes = user["prizes"] | ||
| bio = user["bio"] | ||
|
|
||
| list_of_fields = interests | ||
| list_of_fields.extend(prizes) | ||
| list_of_fields.sort() | ||
| bio += ''.join(list_of_fields) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Style thing: let's try to stick to double quotes |
||
|
|
||
| return bio | ||
|
|
||
|
|
||
| def lv_distance(user, user2): | ||
| user_str = parse_user_to_string(user) | ||
| team_mate_str = parse_user_to_string(user2) | ||
| return fuzz.token_set_ratio(user_str, team_mate_str) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we add a comment here briefly explaining what |
||
|
|
||
|
|
||
| def get_team_recommendations(email): # GET | ||
|
|
@@ -13,127 +36,84 @@ def get_team_recommendations(email): # GET | |
| Return: | ||
| a list of recommended teams to join | ||
| """ | ||
| skills_weight = 1.2 | ||
| seriousness_weight = 1.1 | ||
|
|
||
| user = coll("users").find_one({"_id": email}) | ||
| if not user: | ||
| return {"message": "Invalid user"}, 403 | ||
|
|
||
| # basic info about users | ||
| skills = user["skills"] | ||
| interests = user["interests"] | ||
| prizes = user["prizes"] | ||
| seriousness = user["seriousness"] | ||
|
|
||
| names = set() | ||
| matches = [] | ||
| all_open_teams = coll("teams").aggregate([{"$match": {"complete": False}}]) | ||
| all_open_members = coll("users").aggregate([{"$match": {"team_id": {"$all": [all_open_teams]}}}]) | ||
|
|
||
| team_map = dict() | ||
|
|
||
| # map of distances | ||
| for member in all_open_members: | ||
| team_id = member["team_id"] | ||
| dis = lv_distance(user, member) | ||
| if team_id in team_map: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be |
||
| team_map[team_id] = [dis] | ||
| else: | ||
| team_map[team_id].append(dis) | ||
|
|
||
| # average the distance | ||
| for list_key in team_map: | ||
| member_list = team_map[list_key] | ||
| team_map[list_key] = sum(member_list) / float(len(member_list)) | ||
|
|
||
| # match for skill | ||
| needed_skills = set() | ||
| frontend_languages = set(["html", "css", "javascript", "php", "typscript"]) | ||
| backend_languages = set(["java", "php", "ruby", "python", "c", "c++", "sql", "node.js"]) | ||
|
|
||
| # judging if the user if frontend or backend, and give backend suggestions if only know frontend, vice versa | ||
| skill_set = set(skills) | ||
| front_num = len(skill_set.intersection(skills)) | ||
| back_num = len(skill_set.intersection(skills)) | ||
| front_num = len(skill_set.intersection(frontend_languages)) | ||
| back_num = len(skill_set.intersection(backend_languages)) | ||
|
|
||
| if front_num > (back_num * len(frontend_languages) / len(backend_languages)): | ||
| if back_num < 3: | ||
| needed_skills.update(backend_languages) | ||
| else: | ||
| if front_num < 3: | ||
| front_pers = front_num/(front_num+back_num) | ||
| back_pers = 1-front_pers | ||
| if front_pers > back_pers: | ||
| if front_pers < 0.3: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This number seems arbitrary - is there a reason we picked this? Might be better to just do a direct greater than/less than comparison |
||
| needed_skills.update(frontend_languages) | ||
| if len(needed_skills): | ||
| needed_skills.update(backend_languages) | ||
| needed_skills.update(frontend_languages) | ||
|
|
||
| for skill in needed_skills: | ||
| # collection of all the team's skills | ||
| complementary_skills_match = coll("teams").aggregate( | ||
| [{"$match": {"complete": False, "skills": {"$all": [skill]}}}] | ||
| ) | ||
| # collections of all the team's interests | ||
| if not complementary_skills_match: | ||
| continue | ||
| for match in complementary_skills_match: | ||
| if match['_id'] not in names: | ||
| names.add(match['_id']) | ||
| matches.append(match) | ||
|
|
||
| # add base on interests | ||
| # AR/VR, BlockChain, Communications, CyberSecurity, DevOps, Fintech, Gaming, | ||
| # Healthcare, IoT, LifeHacks, ML/AI, Music, Productivity, Social Good, Voice Skills | ||
|
|
||
| # finding team with listed interests, if too much matches, find from teams in the matches | ||
| if len(matches) > 50: | ||
| for match in matches: | ||
| if len(matches) <= 50: | ||
| break | ||
| team_interests = match["meta"]["interests"] | ||
| # team has no common skill | ||
| if len(list(set(interests).intersection(set(team_interests)))) == 0: | ||
| matches.remove(match) | ||
| names.remove(match["_id"]) | ||
| else: | ||
| needed_skills.update(skill_set) | ||
| else: | ||
| for interest in interests: | ||
| match = coll("teams").aggregate([{"$match": {"complete": False, "meta.interest": {"$all": [interest]}}}]) | ||
| if not match: | ||
| continue | ||
| for m in match: | ||
| if m["_id"] not in names: | ||
| names.add(m["_id"]) | ||
| matches.append(m) | ||
|
|
||
| # add suggestions base on prize | ||
| for prize in prizes: | ||
| match = coll("teams").aggregate([{"$match": {"complete": False, "prizes": {"$all": [prize]}}}]) | ||
| if not match: | ||
| continue | ||
| for m in match: | ||
| if m["_id"] not in names: | ||
| names.add(m["_id"]) | ||
| matches.append(m) | ||
|
|
||
| # if there are too many matches, reduce it base on seriousness | ||
| if len(matches) > 20: | ||
| for team in matches: | ||
| if (abs(team["seriousness"] - seriousness)) > 2: | ||
| matches.remove(team) | ||
| names.remove(team["_id"]) | ||
|
|
||
| # current_team = coll("teams").find_one({"_id": user["team_id"]}) | ||
| # try: | ||
| # matches.remove(current_team) | ||
| # except ValueError: | ||
| # pass | ||
|
|
||
| # inv_in = current_team["incoming_inv"] | ||
| # inv_out = current_team["outgoing_inv"] | ||
|
|
||
| # inv_sum = set() | ||
| # inv_sum.update(set(inv_in)) | ||
| # inv_sum.update(set(inv_out)) | ||
|
|
||
| # for i in inv_sum: | ||
| # try: | ||
| # matches.remove(i) | ||
| # except ValueError: | ||
| # pass | ||
| if front_pers > 0.3: | ||
| needed_skills.update(backend_languages) | ||
| else: | ||
| needed_skills.update(skill_set) | ||
|
|
||
| for team_id in team_map: | ||
| target_team = coll("teams").find_one({"_id": team_id}) | ||
| target_team_skills = target_team["skills"] | ||
| intersection_size = len(set(target_team_skills).intersection(needed_skills)) | ||
| team_map[team_id] *= (intersection_size * skills_weight) | ||
| team_seriousness = target_team["meta"]["seriousness"] | ||
| team_map[team_id] = team_map[team_id] * (intersection_size * skills_weight) * \ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having weights makes sense, but we might want to run a few test scenarios and see how much it actually affects things. For example, we would not want to recommend seriousness 1 to seriousness 5 - that should be a pretty significant difference, meaning more weight. |
||
| (abs(seriousness-team_seriousness) * seriousness_weight) | ||
| sorted_team_list = sorted(team_map.items(), key=lambda kv: (kv[1], kv[0])) | ||
|
|
||
| bad_match_ids = set() | ||
| bad_match_ids.add(user["team_id"]) | ||
| current_team = coll("teams").find_one({"_id": user["team_id"]}) | ||
| bad_match_ids.update(current_team["incoming_inv"]) | ||
| bad_match_ids.update(current_team["outgoing_inv"]) | ||
| good_matches = [] | ||
| for team in matches: | ||
| if team["_id"] not in bad_match_ids: | ||
| good_matches.append(team) | ||
| matches = good_matches | ||
|
|
||
| if not matches: | ||
| for team_id in sorted_team_list[:, 0]: | ||
| if team_id not in bad_match_ids: | ||
| good_matches.append(team_id) | ||
|
|
||
| if not good_matches: | ||
| return {"message": "No recommendations found"}, 404 | ||
|
|
||
| for team in matches: | ||
| for team in good_matches: | ||
| del team["meta"] | ||
|
|
||
| return {"matches": [format_team_object(team) for team in matches]}, 200 | ||
| return {"matches": [format_team_object(team) for team in good_matches]}, 200 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,14 @@ | ||
| from src.flaskapp.db import coll | ||
|
|
||
|
|
||
| def get_partial_profile(email): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is this used? |
||
| user_profile = coll("users").find_one({"_id": email}) | ||
| if not user_profile: | ||
| return {"message": "User not found"}, 404 | ||
| user_profile["user_id"] = user_profile.pop("_id") | ||
| return user_profile, 200 | ||
|
|
||
|
|
||
| def get_user_profile(email): # GET | ||
| """Get user profile | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.