Add files via upload
This commit is contained in:
parent
1df9b1ba0b
commit
8c1fe86f4a
3 changed files with 216 additions and 0 deletions
106
bib/create_bib_html.py
Normal file
106
bib/create_bib_html.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
from bib.customizations import customizations_tae
|
||||
from bib.load_bib_file import load_bib_file
|
||||
from bib.make_dataframe import make_dataframe
|
||||
|
||||
import pandas as pd
|
||||
import json
|
||||
import html
|
||||
|
||||
|
||||
def filter_string(input):
|
||||
return str(html.escape(input).encode("ascii", "xmlcharrefreplace").decode())
|
||||
|
||||
|
||||
def format_entry(entry) -> str:
|
||||
output: str = (
|
||||
str("<tr><td>")
|
||||
+ entry["author"]
|
||||
+ str(" (")
|
||||
+ str(int(entry["year"]))
|
||||
+ str(") ")
|
||||
)
|
||||
if len(entry["doi"]) == 0:
|
||||
output += str("<b>") + filter_string(entry["title"]) + str("</b> ")
|
||||
else:
|
||||
output += (
|
||||
str('<b><a href="')
|
||||
+ entry["doi"]
|
||||
+ str('">')
|
||||
+ filter_string(entry["title"])
|
||||
+ str("</a></b> ")
|
||||
)
|
||||
output += filter_string(entry["journal"]) + "</td></tr>"
|
||||
output = output.replace("{", "<i>")
|
||||
output = output.replace("}", "</i>")
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def create_bib_html(user_string: str, type_string: str, filename_bib: str) -> str:
|
||||
bib_database = load_bib_file(filename_bib, customizations_tae)
|
||||
|
||||
with open("types_db.json", "r") as file:
|
||||
type_dict = json.load(file)
|
||||
|
||||
with open("authors_db.json", "r") as file:
|
||||
author_dict = json.load(file)
|
||||
|
||||
# Make a list of all the bib types we need
|
||||
full_type_list: list = []
|
||||
full_type_list.append(type_string)
|
||||
|
||||
for t_id in type_dict.keys():
|
||||
assert len(type_dict[t_id]) == 3
|
||||
if type_string == t_id:
|
||||
for i in type_dict[t_id][0]:
|
||||
full_type_list.append(i)
|
||||
|
||||
# Make pandas data base for only the selected bib type
|
||||
pf_data_frames = None
|
||||
for i in range(0, len(bib_database.entries)):
|
||||
df = make_dataframe(bib_database.entries[i], author_dict, full_type_list, i)
|
||||
|
||||
if (pf_data_frames is None) and (df is not None):
|
||||
pf_data_frames = df
|
||||
elif df is not None:
|
||||
pf_data_frames = pd.concat((pf_data_frames, df))
|
||||
|
||||
if pf_data_frames is None:
|
||||
return ""
|
||||
|
||||
# Debuging:
|
||||
# pf_data_frames.to_excel("excel_1.xlsx")
|
||||
|
||||
# Filter and sort the pandas data base
|
||||
if len(user_string) > 0:
|
||||
pf_data_frames = pf_data_frames.where(
|
||||
pf_data_frames["author"].str.contains(user_string)
|
||||
).dropna()
|
||||
|
||||
pf_data_frames = pf_data_frames.sort_values(
|
||||
["year", "author"], ascending=[False, True]
|
||||
)
|
||||
|
||||
if len(pf_data_frames) == 0:
|
||||
return ""
|
||||
|
||||
# Debuging:
|
||||
# pf_data_frames.to_excel("excel_2.xlsx")
|
||||
|
||||
# Build html
|
||||
output: str = ""
|
||||
actual_year: int = int(pf_data_frames.iloc[0]["year"])
|
||||
output += str("<h3>") + f"{actual_year}" + str("</h3>\n")
|
||||
output += str("<table>")
|
||||
|
||||
for entry_id in range(0, len(pf_data_frames)):
|
||||
if actual_year != int(pf_data_frames.iloc[entry_id]["year"]):
|
||||
actual_year = int(pf_data_frames.iloc[entry_id]["year"])
|
||||
output += str("</table>")
|
||||
output += str("\n<h3>") + f"{actual_year}" + str("</h3>\n")
|
||||
output += str("<table>")
|
||||
|
||||
output += format_entry(pf_data_frames.iloc[entry_id])
|
||||
output += str("</table>")
|
||||
|
||||
return output
|
|
@ -7,3 +7,10 @@ def customizations_tajd(record):
|
|||
record = bibtexparser.customization.journal(record)
|
||||
record = bibtexparser.customization.doi(record)
|
||||
return record
|
||||
|
||||
|
||||
def customizations_tae(record):
|
||||
record = bibtexparser.customization.type(record)
|
||||
record = bibtexparser.customization.author(record)
|
||||
record = bibtexparser.customization.editor(record)
|
||||
return record
|
||||
|
|
103
bib/make_dataframe.py
Normal file
103
bib/make_dataframe.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
from bib.shorten_authorname import shorten_authorname
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def combine_names(names):
|
||||
name = names[0]
|
||||
if len(names) > 1:
|
||||
for i in names[1:]:
|
||||
name += str(" and ") + i
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def fix_author(name, db):
|
||||
name = shorten_authorname(name)
|
||||
|
||||
for idx in db.keys():
|
||||
if idx == name:
|
||||
return idx
|
||||
|
||||
for id in db[idx]:
|
||||
if id == name:
|
||||
return idx
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def make_dataframe(
|
||||
entry: dict, author_json: dict, full_type_list: list[str], index_number: int
|
||||
):
|
||||
# Check if everything is there
|
||||
if "ENTRYTYPE" not in entry.keys():
|
||||
return None
|
||||
|
||||
if entry["ENTRYTYPE"] not in full_type_list:
|
||||
return None
|
||||
|
||||
if "title" not in entry.keys():
|
||||
return None
|
||||
|
||||
if "year" not in entry.keys() and "date" not in entry.keys():
|
||||
return None
|
||||
|
||||
if "author" not in entry.keys() and "editor" not in entry.keys():
|
||||
return None
|
||||
|
||||
# Title
|
||||
title = str(entry["title"]).lstrip().rstrip()
|
||||
|
||||
# Year
|
||||
if "year" in entry.keys():
|
||||
year = str(entry["year"]).lstrip().rstrip()
|
||||
else:
|
||||
year = str(entry["date"]).split("-")[0].lstrip().rstrip()
|
||||
|
||||
# Authors
|
||||
if "author" in entry.keys():
|
||||
author = entry["author"]
|
||||
else:
|
||||
author = []
|
||||
for e_id in entry["editor"]:
|
||||
author.append(e_id["name"])
|
||||
|
||||
for i in range(0, len(author)):
|
||||
author[i] = fix_author(author[i], author_json)
|
||||
author_string = combine_names(author)
|
||||
|
||||
# DOI
|
||||
doi: str = ""
|
||||
if "doi" in entry.keys():
|
||||
doi = str(entry["doi"]).lstrip().rstrip()
|
||||
|
||||
# Journal name
|
||||
journal: str = ""
|
||||
if "journal" in entry.keys():
|
||||
journal = str(entry["journal"]).lstrip().rstrip()
|
||||
if "journaltitle" in entry.keys():
|
||||
journal = str(entry["journaltitle"]).lstrip().rstrip()
|
||||
elif "booktitle" in entry.keys():
|
||||
journal = str(entry["booktitle"]).lstrip().rstrip()
|
||||
elif "note" in entry.keys():
|
||||
journal = str(entry["note"]).lstrip().rstrip()
|
||||
elif "school" in entry.keys():
|
||||
journal = str(entry["school"]).lstrip().rstrip()
|
||||
elif "publisher" in entry.keys():
|
||||
journal = str(entry["publisher"]).lstrip().rstrip()
|
||||
|
||||
title = title.replace("{", "").replace("}", "")
|
||||
journal = (
|
||||
journal.replace("\\textbackslash", "\\")
|
||||
.replace("Publication Title: ", "")
|
||||
.replace("\\&", "&")
|
||||
)
|
||||
|
||||
dataframe: None | dict = dict(
|
||||
year=year,
|
||||
title=title,
|
||||
author=author_string,
|
||||
doi=doi,
|
||||
journal=journal,
|
||||
)
|
||||
|
||||
return pd.DataFrame(dataframe, index=[index_number])
|
Loading…
Reference in a new issue