From 8c1fe86f4aa45e8a1bb274527f21cf4d29dbd6a4 Mon Sep 17 00:00:00 2001
From: David Rotermund <54365609+davrot@users.noreply.github.com>
Date: Wed, 17 May 2023 21:56:01 +0200
Subject: [PATCH] Add files via upload
---
bib/create_bib_html.py | 106 +++++++++++++++++++++++++++++++++++++++++
bib/customizations.py | 7 +++
bib/make_dataframe.py | 103 +++++++++++++++++++++++++++++++++++++++
3 files changed, 216 insertions(+)
create mode 100644 bib/create_bib_html.py
create mode 100644 bib/make_dataframe.py
diff --git a/bib/create_bib_html.py b/bib/create_bib_html.py
new file mode 100644
index 0000000..50cce84
--- /dev/null
+++ b/bib/create_bib_html.py
@@ -0,0 +1,106 @@
+from bib.customizations import customizations_tae
+from bib.load_bib_file import load_bib_file
+from bib.make_dataframe import make_dataframe
+
+import pandas as pd
+import json
+import html
+
+
+def filter_string(input):
+ return str(html.escape(input).encode("ascii", "xmlcharrefreplace").decode())
+
+
+def format_entry(entry) -> str:
+ output: str = (
+ str("
")
+ + entry["author"]
+ + str(" (")
+ + str(int(entry["year"]))
+ + str(") ")
+ )
+ if len(entry["doi"]) == 0:
+ output += str("") + filter_string(entry["title"]) + str(" ")
+ else:
+ output += (
+ str('')
+ + filter_string(entry["title"])
+ + str(" ")
+ )
+ output += filter_string(entry["journal"]) + " |
"
+ output = output.replace("{", "")
+ output = output.replace("}", "")
+
+ return output
+
+
+def create_bib_html(user_string: str, type_string: str, filename_bib: str) -> str:
+ bib_database = load_bib_file(filename_bib, customizations_tae)
+
+ with open("types_db.json", "r") as file:
+ type_dict = json.load(file)
+
+ with open("authors_db.json", "r") as file:
+ author_dict = json.load(file)
+
+ # Make a list of all the bib types we need
+ full_type_list: list = []
+ full_type_list.append(type_string)
+
+ for t_id in type_dict.keys():
+ assert len(type_dict[t_id]) == 3
+ if type_string == t_id:
+ for i in type_dict[t_id][0]:
+ full_type_list.append(i)
+
+ # Make pandas data base for only the selected bib type
+ pf_data_frames = None
+ for i in range(0, len(bib_database.entries)):
+ df = make_dataframe(bib_database.entries[i], author_dict, full_type_list, i)
+
+ if (pf_data_frames is None) and (df is not None):
+ pf_data_frames = df
+ elif df is not None:
+ pf_data_frames = pd.concat((pf_data_frames, df))
+
+ if pf_data_frames is None:
+ return ""
+
+ # Debuging:
+ # pf_data_frames.to_excel("excel_1.xlsx")
+
+ # Filter and sort the pandas data base
+ if len(user_string) > 0:
+ pf_data_frames = pf_data_frames.where(
+ pf_data_frames["author"].str.contains(user_string)
+ ).dropna()
+
+ pf_data_frames = pf_data_frames.sort_values(
+ ["year", "author"], ascending=[False, True]
+ )
+
+ if len(pf_data_frames) == 0:
+ return ""
+
+ # Debuging:
+ # pf_data_frames.to_excel("excel_2.xlsx")
+
+ # Build html
+ output: str = ""
+ actual_year: int = int(pf_data_frames.iloc[0]["year"])
+ output += str("") + f"{actual_year}" + str("
\n")
+ output += str("")
+
+ for entry_id in range(0, len(pf_data_frames)):
+ if actual_year != int(pf_data_frames.iloc[entry_id]["year"]):
+ actual_year = int(pf_data_frames.iloc[entry_id]["year"])
+ output += str("
")
+ output += str("\n") + f"{actual_year}" + str("
\n")
+ output += str("")
+
+ output += format_entry(pf_data_frames.iloc[entry_id])
+ output += str("
")
+
+ return output
diff --git a/bib/customizations.py b/bib/customizations.py
index bdffb1f..3c10092 100644
--- a/bib/customizations.py
+++ b/bib/customizations.py
@@ -7,3 +7,10 @@ def customizations_tajd(record):
record = bibtexparser.customization.journal(record)
record = bibtexparser.customization.doi(record)
return record
+
+
+def customizations_tae(record):
+ record = bibtexparser.customization.type(record)
+ record = bibtexparser.customization.author(record)
+ record = bibtexparser.customization.editor(record)
+ return record
diff --git a/bib/make_dataframe.py b/bib/make_dataframe.py
new file mode 100644
index 0000000..ed5b0b1
--- /dev/null
+++ b/bib/make_dataframe.py
@@ -0,0 +1,103 @@
+from bib.shorten_authorname import shorten_authorname
+import pandas as pd
+
+
+def combine_names(names):
+ name = names[0]
+ if len(names) > 1:
+ for i in names[1:]:
+ name += str(" and ") + i
+
+ return name
+
+
+def fix_author(name, db):
+ name = shorten_authorname(name)
+
+ for idx in db.keys():
+ if idx == name:
+ return idx
+
+ for id in db[idx]:
+ if id == name:
+ return idx
+
+ return name
+
+
+def make_dataframe(
+ entry: dict, author_json: dict, full_type_list: list[str], index_number: int
+):
+ # Check if everything is there
+ if "ENTRYTYPE" not in entry.keys():
+ return None
+
+ if entry["ENTRYTYPE"] not in full_type_list:
+ return None
+
+ if "title" not in entry.keys():
+ return None
+
+ if "year" not in entry.keys() and "date" not in entry.keys():
+ return None
+
+ if "author" not in entry.keys() and "editor" not in entry.keys():
+ return None
+
+ # Title
+ title = str(entry["title"]).lstrip().rstrip()
+
+ # Year
+ if "year" in entry.keys():
+ year = str(entry["year"]).lstrip().rstrip()
+ else:
+ year = str(entry["date"]).split("-")[0].lstrip().rstrip()
+
+ # Authors
+ if "author" in entry.keys():
+ author = entry["author"]
+ else:
+ author = []
+ for e_id in entry["editor"]:
+ author.append(e_id["name"])
+
+ for i in range(0, len(author)):
+ author[i] = fix_author(author[i], author_json)
+ author_string = combine_names(author)
+
+ # DOI
+ doi: str = ""
+ if "doi" in entry.keys():
+ doi = str(entry["doi"]).lstrip().rstrip()
+
+ # Journal name
+ journal: str = ""
+ if "journal" in entry.keys():
+ journal = str(entry["journal"]).lstrip().rstrip()
+ if "journaltitle" in entry.keys():
+ journal = str(entry["journaltitle"]).lstrip().rstrip()
+ elif "booktitle" in entry.keys():
+ journal = str(entry["booktitle"]).lstrip().rstrip()
+ elif "note" in entry.keys():
+ journal = str(entry["note"]).lstrip().rstrip()
+ elif "school" in entry.keys():
+ journal = str(entry["school"]).lstrip().rstrip()
+ elif "publisher" in entry.keys():
+ journal = str(entry["publisher"]).lstrip().rstrip()
+
+ title = title.replace("{", "").replace("}", "")
+ journal = (
+ journal.replace("\\textbackslash", "\\")
+ .replace("Publication Title: ", "")
+ .replace("\\&", "&")
+ )
+
+ dataframe: None | dict = dict(
+ year=year,
+ title=title,
+ author=author_string,
+ doi=doi,
+ journal=journal,
+ )
+
+ return pd.DataFrame(dataframe, index=[index_number])