latex-ub/install_unibremen/docker/compose_cep/gitbridge/files/get_project.py
2025-05-07 17:38:35 +00:00

242 lines
6.8 KiB
Python

import argh # type: ignore
import shutil
import os
import subprocess
import glob
import docker # type: ignore
import pymongo
import bson
def get_user_id(
username: str, container_mongo_name: str = "overleafmongo", port: int = 27017
) -> bson.objectid.ObjectId | None:
# Establish connection to MongoDB
client = pymongo.MongoClient(container_mongo_name, port)
try:
# Access the 'sharelatex' database
db = client.sharelatex
user_query_filter = {"email": username}
# Use find_one() to get a single document that matches the query
selected_user = db.users.find_one(user_query_filter)
if selected_user:
client.close()
return selected_user["_id"]
finally:
# Close the connection regardless of the outcome (success or failure in finding the user)
client.close()
return None
def check_project_access(
user_id: bson.objectid.ObjectId,
project_id: str,
container_mongo_name: str = "overleafmongo",
port: int = 27017,
) -> bool:
"""
Check if a user has access to a specific project.
Args:
user_id: The ObjectId of the user
project_id: The string representation of the project's _id
container_name: MongoDB container name
port: MongoDB port
Returns:
bool: True if the user has access to the project, False otherwise
"""
# Establish connection to MongoDB
client = pymongo.MongoClient(container_mongo_name, port)
try:
# Access the 'sharelatex' database
db = client.sharelatex
# Convert project_id string to ObjectId
project_object_id = bson.objectid.ObjectId(project_id)
# Create query filter to check if the specific project exists
# and if the user has access to it
project_query_filter = {
"_id": project_object_id, # Specific project ID
"$or": [
{"owner_ref": user_id}, # User is the owner
{"collaberator_refs": user_id}, # User is a collaborator
{"readOnly_refs": user_id}, # User has read-only access
],
}
# Check if any matching document exists
# Using count_documents with limit=1 is efficient for just checking existence
has_access = db.projects.count_documents(project_query_filter, limit=1) > 0
return has_access
except Exception:
return False
finally:
# Close the connection regardless of the outcome
client.close()
def get_container(container_name: str) -> None | docker.models.containers.Container:
client = docker.from_env()
# Find our overleaf container (name is defined in config.json)
running_containers = client.containers.list()
locate_containers = []
for running_container in running_containers:
if running_container.attrs["Name"] == container_name:
locate_containers.append(running_container)
if len(locate_containers) != 1:
return None
return locate_containers[0]
def clean_directory_except_git(directory_path):
"""
Remove all files and subdirectories in the given directory except for the .git directory
Args:
directory_path: Path to the directory to clean
"""
# Make sure the directory exists
if not os.path.exists(directory_path):
return
# List all entries in the directory
for entry in os.listdir(directory_path):
entry_path = os.path.join(directory_path, entry)
# Skip .git directory
if entry == ".git" and os.path.isdir(entry_path):
continue
# Remove file or directory
if os.path.isfile(entry_path) or os.path.islink(entry_path):
os.unlink(entry_path)
elif os.path.isdir(entry_path):
shutil.rmtree(entry_path)
def main(
username: str,
project_id: str,
container_mongo_name: str = "overleafmongo",
container_overleaf_name: str = "overleafserver",
overleaf_path: str = "/var/lib/overleaf/",
host_path: str = "/downloads/",
port: int = 27017,
) -> None:
if len(project_id) == 0:
exit(1)
if username is None:
exit(1)
if len(username) == 0:
exit(1)
# Find user
user_id: bson.objectid.ObjectId | None = get_user_id(
username=username, container_mongo_name=container_mongo_name, port=port
)
if user_id is None:
exit(1)
if (
check_project_access(
user_id=user_id,
project_id=project_id,
container_mongo_name=container_mongo_name,
port=port,
)
is False
):
exit(1)
docker_container: None | docker.models.containers.Container = get_container(
"/" + container_overleaf_name
)
if docker_container is None:
exit(1)
filename: str = f"{user_id}_{project_id}.zip"
fullpath_container: str = os.path.join(overleaf_path, filename)
fullpath_host: str = os.path.join(
host_path,
username,
f"{project_id}.git",
filename,
)
onlypath_host: str = os.path.join(
host_path,
username,
f"{project_id}.git",
)
# Create the archive of the project
result: docker.models.containers.ExecResult = docker_container.exec_run(
(
"/bin/bash -c '"
"cd /overleaf/services/web && "
"node modules/server-ce-scripts/scripts/export-user-projects.mjs "
f"--project-id {project_id} "
f"--output {fullpath_container} "
"'"
)
)
if result.exit_code != 0:
exit(1)
# At this point the file with the project is waiting for us.
# It gives us a tar file... I am not a fan but have to live with it.
bits, _ = docker_container.get_archive(fullpath_container)
os.makedirs(f"{onlypath_host}", mode=0o700, exist_ok=True)
# Delete everything except the .git dir
clean_directory_except_git(onlypath_host)
# Write the file to the host
with open(f"{fullpath_host}.tar", "wb") as f:
for chunk in bits:
f.write(chunk)
subprocess.run([f"tar -xf {fullpath_host}.tar "], shell=True, cwd=onlypath_host)
os.unlink(f"{fullpath_host}.tar")
subprocess.run(
[f"/usr/bin/unzip -qq -o {fullpath_host} "], shell=True, cwd=onlypath_host
)
os.unlink(f"{fullpath_host}")
subprocess.run(
[f"chmod -R 0755 {onlypath_host} "],
shell=True,
)
if not os.path.isdir(f"{onlypath_host}/.git"):
subprocess.run(["/usr/bin/git init -q "], shell=True, cwd=onlypath_host)
subprocess.run(["/usr/bin/git add --all "], shell=True, cwd=onlypath_host)
subprocess.run(
["/usr/bin/git commit -q -m 'by Overleaf CEP' "], shell=True, cwd=onlypath_host
)
return
if __name__ == "__main__":
argh.dispatch_command(main)