Add files via upload

2023-05-17 21:56:01 +02:00 · 2023-05-17 21:56:01 +02:00 · 8c1fe86f4a
commit 8c1fe86f4a
parent 1df9b1ba0b
3 changed files with 216 additions and 0 deletions
--- a/bib/create_bib_html.py
+++ b/bib/create_bib_html.py
@ -0,0 +1,106 @@
 from bib.customizations import customizations_tae
 from bib.load_bib_file import load_bib_file
 from bib.make_dataframe import make_dataframe
 import pandas as pd
 import json
 import html
 def filter_string(input):
    return str(html.escape(input).encode("ascii", "xmlcharrefreplace").decode())
 def format_entry(entry) -> str:
    output: str = (
        str("<tr><td>")
        + entry["author"]
        + str(" (")
        + str(int(entry["year"]))
        + str(") ")
    )
    if len(entry["doi"]) == 0:
        output += str("<b>") + filter_string(entry["title"]) + str("</b> ")
    else:
        output += (
            str('<b><a href="')
            + entry["doi"]
            + str('">')
            + filter_string(entry["title"])
            + str("</a></b> ")
        )
    output += filter_string(entry["journal"]) + "</td></tr>"
    output = output.replace("{", "<i>")
    output = output.replace("}", "</i>")
    return output
 def create_bib_html(user_string: str, type_string: str, filename_bib: str) -> str:
    bib_database = load_bib_file(filename_bib, customizations_tae)
    with open("types_db.json", "r") as file:
        type_dict = json.load(file)
    with open("authors_db.json", "r") as file:
        author_dict = json.load(file)
    # Make a list of all the bib types we need
    full_type_list: list = []
    full_type_list.append(type_string)
    for t_id in type_dict.keys():
        assert len(type_dict[t_id]) == 3
        if type_string == t_id:
            for i in type_dict[t_id][0]:
                full_type_list.append(i)
    # Make pandas data base for only the selected bib type
    pf_data_frames = None
    for i in range(0, len(bib_database.entries)):
        df = make_dataframe(bib_database.entries[i], author_dict, full_type_list, i)
        if (pf_data_frames is None) and (df is not None):
            pf_data_frames = df
        elif df is not None:
            pf_data_frames = pd.concat((pf_data_frames, df))
    if pf_data_frames is None:
        return ""
    # Debuging:
    # pf_data_frames.to_excel("excel_1.xlsx")
    # Filter and sort the pandas data base
    if len(user_string) > 0:
        pf_data_frames = pf_data_frames.where(
            pf_data_frames["author"].str.contains(user_string)
        ).dropna()
    pf_data_frames = pf_data_frames.sort_values(
        ["year", "author"], ascending=[False, True]
    )
    if len(pf_data_frames) == 0:
        return ""
    # Debuging:
    # pf_data_frames.to_excel("excel_2.xlsx")
    # Build html
    output: str = ""
    actual_year: int = int(pf_data_frames.iloc[0]["year"])
    output += str("<h3>") + f"{actual_year}" + str("</h3>\n")
    output += str("<table>")
    for entry_id in range(0, len(pf_data_frames)):
        if actual_year != int(pf_data_frames.iloc[entry_id]["year"]):
            actual_year = int(pf_data_frames.iloc[entry_id]["year"])
            output += str("</table>")
            output += str("\n<h3>") + f"{actual_year}" + str("</h3>\n")
            output += str("<table>")
        output += format_entry(pf_data_frames.iloc[entry_id])
    output += str("</table>")
    return output
--- a/bib/customizations.py
+++ b/bib/customizations.py
@ -7,3 +7,10 @@ def customizations_tajd(record):
    record = bibtexparser.customization.journal(record)
    record = bibtexparser.customization.doi(record)
    return record
 def customizations_tae(record):
    record = bibtexparser.customization.type(record)
    record = bibtexparser.customization.author(record)
    record = bibtexparser.customization.editor(record)
    return record
--- a/bib/make_dataframe.py
+++ b/bib/make_dataframe.py
@ -0,0 +1,103 @@
 from bib.shorten_authorname import shorten_authorname
 import pandas as pd
 def combine_names(names):
    name = names[0]
    if len(names) > 1:
        for i in names[1:]:
            name += str(" and ") + i
    return name
 def fix_author(name, db):
    name = shorten_authorname(name)
    for idx in db.keys():
        if idx == name:
            return idx
        for id in db[idx]:
            if id == name:
                return idx
    return name
 def make_dataframe(
    entry: dict, author_json: dict, full_type_list: list[str], index_number: int
 ):
    # Check if everything is there
    if "ENTRYTYPE" not in entry.keys():
        return None
    if entry["ENTRYTYPE"] not in full_type_list:
        return None
    if "title" not in entry.keys():
        return None
    if "year" not in entry.keys() and "date" not in entry.keys():
        return None
    if "author" not in entry.keys() and "editor" not in entry.keys():
        return None
    # Title
    title = str(entry["title"]).lstrip().rstrip()
    # Year
    if "year" in entry.keys():
        year = str(entry["year"]).lstrip().rstrip()
    else:
        year = str(entry["date"]).split("-")[0].lstrip().rstrip()
    # Authors
    if "author" in entry.keys():
        author = entry["author"]
    else:
        author = []
        for e_id in entry["editor"]:
            author.append(e_id["name"])
    for i in range(0, len(author)):
        author[i] = fix_author(author[i], author_json)
    author_string = combine_names(author)
    # DOI
    doi: str = ""
    if "doi" in entry.keys():
        doi = str(entry["doi"]).lstrip().rstrip()
    # Journal name
    journal: str = ""
    if "journal" in entry.keys():
        journal = str(entry["journal"]).lstrip().rstrip()
    if "journaltitle" in entry.keys():
        journal = str(entry["journaltitle"]).lstrip().rstrip()
    elif "booktitle" in entry.keys():
        journal = str(entry["booktitle"]).lstrip().rstrip()
    elif "note" in entry.keys():
        journal = str(entry["note"]).lstrip().rstrip()
    elif "school" in entry.keys():
        journal = str(entry["school"]).lstrip().rstrip()
    elif "publisher" in entry.keys():
        journal = str(entry["publisher"]).lstrip().rstrip()
    title = title.replace("{", "").replace("}", "")
    journal = (
        journal.replace("\\textbackslash", "\\")
        .replace("Publication Title: ", "")
        .replace("\\&", "&")
    )
    dataframe: None | dict = dict(
        year=year,
        title=title,
        author=author_string,
        doi=doi,
        journal=journal,
    )
    return pd.DataFrame(dataframe, index=[index_number])