From 8c1fe86f4aa45e8a1bb274527f21cf4d29dbd6a4 Mon Sep 17 00:00:00 2001 From: David Rotermund <54365609+davrot@users.noreply.github.com> Date: Wed, 17 May 2023 21:56:01 +0200 Subject: [PATCH] Add files via upload --- bib/create_bib_html.py | 106 +++++++++++++++++++++++++++++++++++++++++ bib/customizations.py | 7 +++ bib/make_dataframe.py | 103 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 216 insertions(+) create mode 100644 bib/create_bib_html.py create mode 100644 bib/make_dataframe.py diff --git a/bib/create_bib_html.py b/bib/create_bib_html.py new file mode 100644 index 0000000..50cce84 --- /dev/null +++ b/bib/create_bib_html.py @@ -0,0 +1,106 @@ +from bib.customizations import customizations_tae +from bib.load_bib_file import load_bib_file +from bib.make_dataframe import make_dataframe + +import pandas as pd +import json +import html + + +def filter_string(input): + return str(html.escape(input).encode("ascii", "xmlcharrefreplace").decode()) + + +def format_entry(entry) -> str: + output: str = ( + str("") + + entry["author"] + + str(" (") + + str(int(entry["year"])) + + str(") ") + ) + if len(entry["doi"]) == 0: + output += str("") + filter_string(entry["title"]) + str(" ") + else: + output += ( + str('') + + filter_string(entry["title"]) + + str(" ") + ) + output += filter_string(entry["journal"]) + "" + output = output.replace("{", "") + output = output.replace("}", "") + + return output + + +def create_bib_html(user_string: str, type_string: str, filename_bib: str) -> str: + bib_database = load_bib_file(filename_bib, customizations_tae) + + with open("types_db.json", "r") as file: + type_dict = json.load(file) + + with open("authors_db.json", "r") as file: + author_dict = json.load(file) + + # Make a list of all the bib types we need + full_type_list: list = [] + full_type_list.append(type_string) + + for t_id in type_dict.keys(): + assert len(type_dict[t_id]) == 3 + if type_string == t_id: + for i in type_dict[t_id][0]: + full_type_list.append(i) + + # Make pandas data base for only the selected bib type + pf_data_frames = None + for i in range(0, len(bib_database.entries)): + df = make_dataframe(bib_database.entries[i], author_dict, full_type_list, i) + + if (pf_data_frames is None) and (df is not None): + pf_data_frames = df + elif df is not None: + pf_data_frames = pd.concat((pf_data_frames, df)) + + if pf_data_frames is None: + return "" + + # Debuging: + # pf_data_frames.to_excel("excel_1.xlsx") + + # Filter and sort the pandas data base + if len(user_string) > 0: + pf_data_frames = pf_data_frames.where( + pf_data_frames["author"].str.contains(user_string) + ).dropna() + + pf_data_frames = pf_data_frames.sort_values( + ["year", "author"], ascending=[False, True] + ) + + if len(pf_data_frames) == 0: + return "" + + # Debuging: + # pf_data_frames.to_excel("excel_2.xlsx") + + # Build html + output: str = "" + actual_year: int = int(pf_data_frames.iloc[0]["year"]) + output += str("

") + f"{actual_year}" + str("

\n") + output += str("") + + for entry_id in range(0, len(pf_data_frames)): + if actual_year != int(pf_data_frames.iloc[entry_id]["year"]): + actual_year = int(pf_data_frames.iloc[entry_id]["year"]) + output += str("
") + output += str("\n

") + f"{actual_year}" + str("

\n") + output += str("") + + output += format_entry(pf_data_frames.iloc[entry_id]) + output += str("
") + + return output diff --git a/bib/customizations.py b/bib/customizations.py index bdffb1f..3c10092 100644 --- a/bib/customizations.py +++ b/bib/customizations.py @@ -7,3 +7,10 @@ def customizations_tajd(record): record = bibtexparser.customization.journal(record) record = bibtexparser.customization.doi(record) return record + + +def customizations_tae(record): + record = bibtexparser.customization.type(record) + record = bibtexparser.customization.author(record) + record = bibtexparser.customization.editor(record) + return record diff --git a/bib/make_dataframe.py b/bib/make_dataframe.py new file mode 100644 index 0000000..ed5b0b1 --- /dev/null +++ b/bib/make_dataframe.py @@ -0,0 +1,103 @@ +from bib.shorten_authorname import shorten_authorname +import pandas as pd + + +def combine_names(names): + name = names[0] + if len(names) > 1: + for i in names[1:]: + name += str(" and ") + i + + return name + + +def fix_author(name, db): + name = shorten_authorname(name) + + for idx in db.keys(): + if idx == name: + return idx + + for id in db[idx]: + if id == name: + return idx + + return name + + +def make_dataframe( + entry: dict, author_json: dict, full_type_list: list[str], index_number: int +): + # Check if everything is there + if "ENTRYTYPE" not in entry.keys(): + return None + + if entry["ENTRYTYPE"] not in full_type_list: + return None + + if "title" not in entry.keys(): + return None + + if "year" not in entry.keys() and "date" not in entry.keys(): + return None + + if "author" not in entry.keys() and "editor" not in entry.keys(): + return None + + # Title + title = str(entry["title"]).lstrip().rstrip() + + # Year + if "year" in entry.keys(): + year = str(entry["year"]).lstrip().rstrip() + else: + year = str(entry["date"]).split("-")[0].lstrip().rstrip() + + # Authors + if "author" in entry.keys(): + author = entry["author"] + else: + author = [] + for e_id in entry["editor"]: + author.append(e_id["name"]) + + for i in range(0, len(author)): + author[i] = fix_author(author[i], author_json) + author_string = combine_names(author) + + # DOI + doi: str = "" + if "doi" in entry.keys(): + doi = str(entry["doi"]).lstrip().rstrip() + + # Journal name + journal: str = "" + if "journal" in entry.keys(): + journal = str(entry["journal"]).lstrip().rstrip() + if "journaltitle" in entry.keys(): + journal = str(entry["journaltitle"]).lstrip().rstrip() + elif "booktitle" in entry.keys(): + journal = str(entry["booktitle"]).lstrip().rstrip() + elif "note" in entry.keys(): + journal = str(entry["note"]).lstrip().rstrip() + elif "school" in entry.keys(): + journal = str(entry["school"]).lstrip().rstrip() + elif "publisher" in entry.keys(): + journal = str(entry["publisher"]).lstrip().rstrip() + + title = title.replace("{", "").replace("}", "") + journal = ( + journal.replace("\\textbackslash", "\\") + .replace("Publication Title: ", "") + .replace("\\&", "&") + ) + + dataframe: None | dict = dict( + year=year, + title=title, + author=author_string, + doi=doi, + journal=journal, + ) + + return pd.DataFrame(dataframe, index=[index_number])