Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,5 @@ Werkzeug==0.16.1
wrapt==1.12.1
wsgi-request-logger==0.4.6
zappa==0.51.0

fuzzywuzzy~=0.18.0
11 changes: 5 additions & 6 deletions src/flaskapp/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def index():
@app.route("/users", methods=["GET", "POST"])
@authenticate
def users(email):

if request.method == "GET":
# Filter response using query parameters
# Might need to add pagination (limit/offset) for this response
Expand Down Expand Up @@ -136,11 +135,11 @@ def teams(email):
data = request.get_json(silent=True)

if (
not data
or "name" not in data
or "desc" not in data
or not data["name"]
or not data["desc"]
not data
or "name" not in data
or "desc" not in data
or not data["name"]
or not data["desc"]
):
return {"message": "Required info not found"}, 400
team_name = format_string(data["name"])
Expand Down
166 changes: 73 additions & 93 deletions src/matching/team_recommendations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
from src.flaskapp.db import coll
from src.flaskapp.util import format_team_object
from fuzzywuzzy import fuzz


def parse_user_to_string(user):
if not user:
return {"message": "Invalid user or user not exist"}, 403

interests = user["interests"]
prizes = user["prizes"]
bio = user["bio"]

list_of_fields = interests
list_of_fields.extend(prizes)
list_of_fields.sort()
bio += ''.join(list_of_fields)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style thing: let's try to stick to double quotes ""


return bio


def lv_distance(user, user2):
user_str = parse_user_to_string(user)
team_mate_str = parse_user_to_string(user2)
return fuzz.token_set_ratio(user_str, team_mate_str)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add a comment here briefly explaining what token_set_ratio does



def get_team_recommendations(email): # GET
Expand All @@ -13,127 +36,84 @@ def get_team_recommendations(email): # GET
Return:
a list of recommended teams to join
"""
skills_weight = 1.2
seriousness_weight = 1.1

user = coll("users").find_one({"_id": email})
if not user:
return {"message": "Invalid user"}, 403

# basic info about users
skills = user["skills"]
interests = user["interests"]
prizes = user["prizes"]
seriousness = user["seriousness"]

names = set()
matches = []
all_open_teams = coll("teams").aggregate([{"$match": {"complete": False}}])
all_open_members = coll("users").aggregate([{"$match": {"team_id": {"$all": [all_open_teams]}}}])

team_map = dict()

# map of distances
for member in all_open_members:
team_id = member["team_id"]
dis = lv_distance(user, member)
if team_id in team_map:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be if team_id not in team_map - might want to double check this

team_map[team_id] = [dis]
else:
team_map[team_id].append(dis)

# average the distance
for list_key in team_map:
member_list = team_map[list_key]
team_map[list_key] = sum(member_list) / float(len(member_list))

# match for skill
needed_skills = set()
frontend_languages = set(["html", "css", "javascript", "php", "typscript"])
backend_languages = set(["java", "php", "ruby", "python", "c", "c++", "sql", "node.js"])

# judging if the user if frontend or backend, and give backend suggestions if only know frontend, vice versa
skill_set = set(skills)
front_num = len(skill_set.intersection(skills))
back_num = len(skill_set.intersection(skills))
front_num = len(skill_set.intersection(frontend_languages))
back_num = len(skill_set.intersection(backend_languages))

if front_num > (back_num * len(frontend_languages) / len(backend_languages)):
if back_num < 3:
needed_skills.update(backend_languages)
else:
if front_num < 3:
front_pers = front_num/(front_num+back_num)
back_pers = 1-front_pers
if front_pers > back_pers:
if front_pers < 0.3:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This number seems arbitrary - is there a reason we picked this? Might be better to just do a direct greater than/less than comparison

needed_skills.update(frontend_languages)
if len(needed_skills):
needed_skills.update(backend_languages)
needed_skills.update(frontend_languages)

for skill in needed_skills:
# collection of all the team's skills
complementary_skills_match = coll("teams").aggregate(
[{"$match": {"complete": False, "skills": {"$all": [skill]}}}]
)
# collections of all the team's interests
if not complementary_skills_match:
continue
for match in complementary_skills_match:
if match['_id'] not in names:
names.add(match['_id'])
matches.append(match)

# add base on interests
# AR/VR, BlockChain, Communications, CyberSecurity, DevOps, Fintech, Gaming,
# Healthcare, IoT, LifeHacks, ML/AI, Music, Productivity, Social Good, Voice Skills

# finding team with listed interests, if too much matches, find from teams in the matches
if len(matches) > 50:
for match in matches:
if len(matches) <= 50:
break
team_interests = match["meta"]["interests"]
# team has no common skill
if len(list(set(interests).intersection(set(team_interests)))) == 0:
matches.remove(match)
names.remove(match["_id"])
else:
needed_skills.update(skill_set)
else:
for interest in interests:
match = coll("teams").aggregate([{"$match": {"complete": False, "meta.interest": {"$all": [interest]}}}])
if not match:
continue
for m in match:
if m["_id"] not in names:
names.add(m["_id"])
matches.append(m)

# add suggestions base on prize
for prize in prizes:
match = coll("teams").aggregate([{"$match": {"complete": False, "prizes": {"$all": [prize]}}}])
if not match:
continue
for m in match:
if m["_id"] not in names:
names.add(m["_id"])
matches.append(m)

# if there are too many matches, reduce it base on seriousness
if len(matches) > 20:
for team in matches:
if (abs(team["seriousness"] - seriousness)) > 2:
matches.remove(team)
names.remove(team["_id"])

# current_team = coll("teams").find_one({"_id": user["team_id"]})
# try:
# matches.remove(current_team)
# except ValueError:
# pass

# inv_in = current_team["incoming_inv"]
# inv_out = current_team["outgoing_inv"]

# inv_sum = set()
# inv_sum.update(set(inv_in))
# inv_sum.update(set(inv_out))

# for i in inv_sum:
# try:
# matches.remove(i)
# except ValueError:
# pass
if front_pers > 0.3:
needed_skills.update(backend_languages)
else:
needed_skills.update(skill_set)

for team_id in team_map:
target_team = coll("teams").find_one({"_id": team_id})
target_team_skills = target_team["skills"]
intersection_size = len(set(target_team_skills).intersection(needed_skills))
team_map[team_id] *= (intersection_size * skills_weight)
team_seriousness = target_team["meta"]["seriousness"]
team_map[team_id] = team_map[team_id] * (intersection_size * skills_weight) * \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having weights makes sense, but we might want to run a few test scenarios and see how much it actually affects things. For example, we would not want to recommend seriousness 1 to seriousness 5 - that should be a pretty significant difference, meaning more weight.

(abs(seriousness-team_seriousness) * seriousness_weight)
sorted_team_list = sorted(team_map.items(), key=lambda kv: (kv[1], kv[0]))

bad_match_ids = set()
bad_match_ids.add(user["team_id"])
current_team = coll("teams").find_one({"_id": user["team_id"]})
bad_match_ids.update(current_team["incoming_inv"])
bad_match_ids.update(current_team["outgoing_inv"])
good_matches = []
for team in matches:
if team["_id"] not in bad_match_ids:
good_matches.append(team)
matches = good_matches

if not matches:
for team_id in sorted_team_list[:, 0]:
if team_id not in bad_match_ids:
good_matches.append(team_id)

if not good_matches:
return {"message": "No recommendations found"}, 404

for team in matches:
for team in good_matches:
del team["meta"]

return {"matches": [format_team_object(team) for team in matches]}, 200
return {"matches": [format_team_object(team) for team in good_matches]}, 200
8 changes: 8 additions & 0 deletions src/users/user_profile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
from src.flaskapp.db import coll


def get_partial_profile(email):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this used?

user_profile = coll("users").find_one({"_id": email})
if not user_profile:
return {"message": "User not found"}, 404
user_profile["user_id"] = user_profile.pop("_id")
return user_profile, 200


def get_user_profile(email): # GET
"""Get user profile
Expand Down