From 8c1fe86f4aa45e8a1bb274527f21cf4d29dbd6a4 Mon Sep 17 00:00:00 2001
From: David Rotermund <54365609+davrot@users.noreply.github.com>
Date: Wed, 17 May 2023 21:56:01 +0200
Subject: [PATCH] Add files via upload

---
 bib/create_bib_html.py | 106 +++++++++++++++++++++++++++++++++++++++++
 bib/customizations.py  |   7 +++
 bib/make_dataframe.py  | 103 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 216 insertions(+)
 create mode 100644 bib/create_bib_html.py
 create mode 100644 bib/make_dataframe.py
diff --git a/bib/create_bib_html.py b/bib/create_bib_html.py
new file mode 100644
index 0000000..50cce84
--- /dev/null
+++ b/bib/create_bib_html.py
@@ -0,0 +1,106 @@
+from bib.customizations import customizations_tae
+from bib.load_bib_file import load_bib_file
+from bib.make_dataframe import make_dataframe
+
+import pandas as pd
+import json
+import html
+
+
+def filter_string(input):
+    return str(html.escape(input).encode("ascii", "xmlcharrefreplace").decode())
+
+
+def format_entry(entry) -> str:
+    output: str = (
+        str("<tr><td>")
+        + entry["author"]
+        + str(" (")
+        + str(int(entry["year"]))
+        + str(") ")
+    )
+    if len(entry["doi"]) == 0:
+        output += str("<b>") + filter_string(entry["title"]) + str("</b> ")
+    else:
+        output += (
+            str('<b><a href="')
+            + entry["doi"]
+            + str('">')
+            + filter_string(entry["title"])
+            + str("</a></b> ")
+        )
+    output += filter_string(entry["journal"]) + "</td></tr>"
+    output = output.replace("{", "<i>")
+    output = output.replace("}", "</i>")
+
+    return output
+
+
+def create_bib_html(user_string: str, type_string: str, filename_bib: str) -> str:
+    bib_database = load_bib_file(filename_bib, customizations_tae)
+
+    with open("types_db.json", "r") as file:
+        type_dict = json.load(file)
+
+    with open("authors_db.json", "r") as file:
+        author_dict = json.load(file)
+
+    # Make a list of all the bib types we need
+    full_type_list: list = []
+    full_type_list.append(type_string)
+
+    for t_id in type_dict.keys():
+        assert len(type_dict[t_id]) == 3
+        if type_string == t_id:
+            for i in type_dict[t_id][0]:
+                full_type_list.append(i)
+
+    # Make pandas data base for only the selected bib type
+    pf_data_frames = None
+    for i in range(0, len(bib_database.entries)):
+        df = make_dataframe(bib_database.entries[i], author_dict, full_type_list, i)
+
+        if (pf_data_frames is None) and (df is not None):
+            pf_data_frames = df
+        elif df is not None:
+            pf_data_frames = pd.concat((pf_data_frames, df))
+
+    if pf_data_frames is None:
+        return ""
+
+    # Debuging:
+    # pf_data_frames.to_excel("excel_1.xlsx")
+
+    # Filter and sort the pandas data base
+    if len(user_string) > 0:
+        pf_data_frames = pf_data_frames.where(
+            pf_data_frames["author"].str.contains(user_string)
+        ).dropna()
+
+    pf_data_frames = pf_data_frames.sort_values(
+        ["year", "author"], ascending=[False, True]
+    )
+
+    if len(pf_data_frames) == 0:
+        return ""
+
+    # Debuging:
+    # pf_data_frames.to_excel("excel_2.xlsx")
+
+    # Build html
+    output: str = ""
+    actual_year: int = int(pf_data_frames.iloc[0]["year"])
+    output += str("<h3>") + f"{actual_year}" + str("</h3>\n")
+    output += str("<table>")
+
+    for entry_id in range(0, len(pf_data_frames)):
+        if actual_year != int(pf_data_frames.iloc[entry_id]["year"]):
+            actual_year = int(pf_data_frames.iloc[entry_id]["year"])
+            output += str("</table>")
+            output += str("\n<h3>") + f"{actual_year}" + str("</h3>\n")
+            output += str("<table>")
+
+        output += format_entry(pf_data_frames.iloc[entry_id])
+    output += str("</table>")
+
+    return output
diff --git a/bib/customizations.py b/bib/customizations.py
index bdffb1f..3c10092 100644
--- a/bib/customizations.py
+++ b/bib/customizations.py
@@ -7,3 +7,10 @@ def customizations_tajd(record):
     record = bibtexparser.customization.journal(record)
     record = bibtexparser.customization.doi(record)
     return record
+
+
+def customizations_tae(record):
+    record = bibtexparser.customization.type(record)
+    record = bibtexparser.customization.author(record)
+    record = bibtexparser.customization.editor(record)
+    return record
diff --git a/bib/make_dataframe.py b/bib/make_dataframe.py
new file mode 100644
index 0000000..ed5b0b1
--- /dev/null
+++ b/bib/make_dataframe.py
@@ -0,0 +1,103 @@
+from bib.shorten_authorname import shorten_authorname
+import pandas as pd
+
+
+def combine_names(names):
+    name = names[0]
+    if len(names) > 1:
+        for i in names[1:]:
+            name += str(" and ") + i
+
+    return name
+
+
+def fix_author(name, db):
+    name = shorten_authorname(name)
+
+    for idx in db.keys():
+        if idx == name:
+            return idx
+
+        for id in db[idx]:
+            if id == name:
+                return idx
+
+    return name
+
+
+def make_dataframe(
+    entry: dict, author_json: dict, full_type_list: list[str], index_number: int
+):
+    # Check if everything is there
+    if "ENTRYTYPE" not in entry.keys():
+        return None
+
+    if entry["ENTRYTYPE"] not in full_type_list:
+        return None
+
+    if "title" not in entry.keys():
+        return None
+
+    if "year" not in entry.keys() and "date" not in entry.keys():
+        return None
+
+    if "author" not in entry.keys() and "editor" not in entry.keys():
+        return None
+
+    # Title
+    title = str(entry["title"]).lstrip().rstrip()
+
+    # Year
+    if "year" in entry.keys():
+        year = str(entry["year"]).lstrip().rstrip()
+    else:
+        year = str(entry["date"]).split("-")[0].lstrip().rstrip()
+
+    # Authors
+    if "author" in entry.keys():
+        author = entry["author"]
+    else:
+        author = []
+        for e_id in entry["editor"]:
+            author.append(e_id["name"])
+
+    for i in range(0, len(author)):
+        author[i] = fix_author(author[i], author_json)
+    author_string = combine_names(author)
+
+    # DOI
+    doi: str = ""
+    if "doi" in entry.keys():
+        doi = str(entry["doi"]).lstrip().rstrip()
+
+    # Journal name
+    journal: str = ""
+    if "journal" in entry.keys():
+        journal = str(entry["journal"]).lstrip().rstrip()
+    if "journaltitle" in entry.keys():
+        journal = str(entry["journaltitle"]).lstrip().rstrip()
+    elif "booktitle" in entry.keys():
+        journal = str(entry["booktitle"]).lstrip().rstrip()
+    elif "note" in entry.keys():
+        journal = str(entry["note"]).lstrip().rstrip()
+    elif "school" in entry.keys():
+        journal = str(entry["school"]).lstrip().rstrip()
+    elif "publisher" in entry.keys():
+        journal = str(entry["publisher"]).lstrip().rstrip()
+
+    title = title.replace("{", "").replace("}", "")
+    journal = (
+        journal.replace("\\textbackslash", "\\")
+        .replace("Publication Title: ", "")
+        .replace("\\&", "&")
+    )
+
+    dataframe: None | dict = dict(
+        year=year,
+        title=title,
+        author=author_string,
+        doi=doi,
+        journal=journal,
+    )
+
+    return pd.DataFrame(dataframe, index=[index_number])