df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) # Convert the raw grades to a categorical data type. df["grade"] = df["raw_grade"].astype("category") df["grade"] 0 a 1 b 2 b 3 a 4 a 5 e Name: grade, dtype: category Categories (3, object): [a, b, e] # df["grade"].cat.categories # Rename the categories to more meaningful names (assigning to Series.cat.categories is inplace!) df["grade"].cat.categories = ["very good", "good", "very bad"] df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"]) df["grade"]: 0 very good 1 good 2 good 3 very good 4 very good 5 very bad ## Sorting is per order in the categories, not lexical order. df.sort_values(by="grade") Out[133]: id raw_grade grade 5 6 e very bad 1 2 b good 2 3 b good 0 1 a good 3 4 a very good 4 5 a very good ## Grouping by a categorical column shows also empty categories. df.groupby("grade").size() grade very bad 1 bad 0 medium 0 good 2 very good 3