+diff --git a/services/web/frontend/js/features/ui/components/bootstrap-5/navbar/default-navbar.tsx b/services/web/frontend/js/features/ui/components/bootstrap-5/navbar/default-navbar.tsx
+index 2480b7f061f..8e5429dbde6 100644
+--- a/services/web/frontend/js/features/ui/components/bootstrap-5/navbar/default-navbar.tsx
++++ b/services/web/frontend/js/features/ui/components/bootstrap-5/navbar/default-navbar.tsx
+@@ -1,4 +1,4 @@
+-import { useState } from 'react'
++import React, { useState } from 'react'
+ import { sendMB } from '@/infrastructure/event-tracking'
+ import { useTranslation } from 'react-i18next'
+ import { Button, Container, Nav, Navbar } from 'react-bootstrap'
+@@ -13,9 +13,15 @@ import MaterialIcon from '@/shared/components/material-icon'
+ import { useContactUsModal } from '@/shared/hooks/use-contact-us-modal'
+ import { UserProvider } from '@/shared/context/user-context'
+ import { X } from '@phosphor-icons/react'
++import overleafWhiteLogo from '@/shared/svgs/overleaf-white.svg'
++import overleafBlackLogo from '@/shared/svgs/overleaf-black.svg'
++import type { CSSPropertiesWithVariables } from '../../../../../../../types/css-properties-with-variables'
+
+-function DefaultNavbar(props: DefaultNavbarMetadata) {
++function DefaultNavbar(
++ props: DefaultNavbarMetadata & { overleafLogo?: string }
++) {
+ const {
++ overleafLogo,
+ customLogo,
+ title,
+ canDisplayAdminMenu,
+@@ -49,10 +55,20 @@ function DefaultNavbar(props: DefaultNavbarMetadata) {
+ className="navbar-default navbar-main"
+ expand="lg"
+ onToggle={expanded => setExpanded(expanded)}
++ style={
++ {
++ '--navbar-brand-image-default-url': `url("${overleafWhiteLogo}")`,
++ '--navbar-brand-image-redesign-url': `url("${overleafBlackLogo}")`,
++ } as CSSPropertiesWithVariables
++ }
+ >
+
+
+-
++
+ {enableUpgradeButton ? (
+
) {
++}: Pick & {
++ overleafLogo?: string
++}) {
+ const { appName } = getMeta('ol-ExposedSettings')
+-
+ if (customLogo) {
+ return (
+ // eslint-disable-next-line jsx-a11y/anchor-has-content
+@@ -24,9 +26,16 @@ export default function HeaderLogoOrTitle({
+
+ )
+ } else {
++ const style = overleafLogo
++ ? {
++ style: {
++ backgroundImage: `url("${overleafLogo}")`,
++ },
++ }
++ : null
+ return (
+ // eslint-disable-next-line jsx-a11y/anchor-has-content
+-
++
+ )
+ }
+ }
+diff --git a/services/web/frontend/js/shared/svgs/overleaf-black.svg b/services/web/frontend/js/shared/svgs/overleaf-black.svg
+new file mode 100644
+index 00000000000..ea0678438ba
+--- /dev/null
++++ b/services/web/frontend/js/shared/svgs/overleaf-black.svg
+@@ -0,0 +1,9 @@
++
++
++
++
++
++
++
++
++
+diff --git a/services/web/frontend/js/shared/svgs/overleaf-white.svg b/services/web/frontend/js/shared/svgs/overleaf-white.svg
+new file mode 100644
+index 00000000000..2ced81aa46d
+--- /dev/null
++++ b/services/web/frontend/js/shared/svgs/overleaf-white.svg
+@@ -0,0 +1 @@
++
+\ No newline at end of file
+diff --git a/services/web/frontend/stylesheets/bootstrap-5/components/nav.scss b/services/web/frontend/stylesheets/bootstrap-5/components/nav.scss
+index 5d28341cf53..dd0600ed15d 100644
+--- a/services/web/frontend/stylesheets/bootstrap-5/components/nav.scss
++++ b/services/web/frontend/stylesheets/bootstrap-5/components/nav.scss
+@@ -8,7 +8,10 @@
+ --navbar-padding-h: var(--spacing-05);
+ --navbar-padding: 0 var(--navbar-padding-h);
+ --navbar-brand-width: 130px;
+- --navbar-brand-image-url: url('../../../../public/img/ol-brand/overleaf-white.svg');
++ --navbar-brand-image-url: var(
++ --navbar-brand-image-default-url,
++ url('../../../../public/img/ol-brand/overleaf-white.svg')
++ );
+
+ // Title, when used instead of a logo
+ --navbar-title-font-size: var(--font-size-05);
+diff --git a/services/web/frontend/stylesheets/bootstrap-5/components/navbar.scss b/services/web/frontend/stylesheets/bootstrap-5/components/navbar.scss
+index 3b984bb6f36..a8855ea1ca3 100644
+--- a/services/web/frontend/stylesheets/bootstrap-5/components/navbar.scss
++++ b/services/web/frontend/stylesheets/bootstrap-5/components/navbar.scss
+@@ -216,7 +216,10 @@
+ .website-redesign .navbar-default {
+ --navbar-title-color: var(--content-primary);
+ --navbar-title-color-hover: var(--content-secondary);
+- --navbar-brand-image-url: url('../../../../public/img/ol-brand/overleaf-black.svg');
++ --navbar-brand-image-url: var(
++ --navbar-brand-image-redesign-url,
++ url('../../../../public/img/ol-brand/overleaf-black.svg')
++ );
+ --navbar-subdued-color: var(--content-primary);
+ --navbar-subdued-hover-bg: var(--bg-dark-primary);
+ --navbar-subdued-hover-color: var(--content-primary-dark);
+diff --git a/services/web/types/css-properties-with-variables.tsx b/services/web/types/css-properties-with-variables.tsx
+new file mode 100644
+index 00000000000..fe0e85902a6
+--- /dev/null
++++ b/services/web/types/css-properties-with-variables.tsx
+@@ -0,0 +1,4 @@
++import { CSSProperties } from 'react'
++
++export type CSSPropertiesWithVariables = CSSProperties &
++ Record<`--${string}`, number | string>
+--
+2.43.0
+
diff --git a/server-ce/hotfix/5.5.2/pr_26783.patch b/server-ce/hotfix/5.5.2/pr_26783.patch
new file mode 100644
index 0000000000..74db897a5f
--- /dev/null
+++ b/server-ce/hotfix/5.5.2/pr_26783.patch
@@ -0,0 +1,58 @@
+diff --git a/services/web/modules/server-ce-scripts/scripts/check-mongodb.mjs b/services/web/modules/server-ce-scripts/scripts/check-mongodb.mjs
+index 29f5e7ffd26..46be91a1d9c 100644
+--- a/services/web/modules/server-ce-scripts/scripts/check-mongodb.mjs
++++ b/services/web/modules/server-ce-scripts/scripts/check-mongodb.mjs
+@@ -9,6 +9,34 @@ const { ObjectId } = mongodb
+ const MIN_MONGO_VERSION = [6, 0]
+ const MIN_MONGO_FEATURE_COMPATIBILITY_VERSION = [6, 0]
+
++// Allow ignoring admin check failures via an environment variable
++const OVERRIDE_ENV_VAR_NAME = 'ALLOW_MONGO_ADMIN_CHECK_FAILURES'
++
++function shouldSkipAdminChecks() {
++ return process.env[OVERRIDE_ENV_VAR_NAME] === 'true'
++}
++
++function handleUnauthorizedError(err, feature) {
++ if (
++ err instanceof mongodb.MongoServerError &&
++ err.codeName === 'Unauthorized'
++ ) {
++ console.warn(`Warning: failed to check ${feature} (not authorised)`)
++ if (!shouldSkipAdminChecks()) {
++ console.error(
++ `Please ensure the MongoDB user has the required admin permissions, or\n` +
++ `set the environment variable ${OVERRIDE_ENV_VAR_NAME}=true to ignore this check.`
++ )
++ process.exit(1)
++ }
++ console.warn(
++ `Ignoring ${feature} check failure (${OVERRIDE_ENV_VAR_NAME}=${process.env[OVERRIDE_ENV_VAR_NAME]})`
++ )
++ } else {
++ throw err
++ }
++}
++
+ async function main() {
+ let mongoClient
+ try {
+@@ -18,8 +46,16 @@ async function main() {
+ throw err
+ }
+
+- await checkMongoVersion(mongoClient)
+- await checkFeatureCompatibilityVersion(mongoClient)
++ try {
++ await checkMongoVersion(mongoClient)
++ } catch (err) {
++ handleUnauthorizedError(err, 'MongoDB version')
++ }
++ try {
++ await checkFeatureCompatibilityVersion(mongoClient)
++ } catch (err) {
++ handleUnauthorizedError(err, 'MongoDB feature compatibility version')
++ }
+
+ try {
+ await testTransactions(mongoClient)
diff --git a/server-ce/init_scripts/100_set_docker_host_ipaddress.sh b/server-ce/init_scripts/100_set_docker_host_ipaddress.sh
index 0587a9b222..646b55ada7 100755
--- a/server-ce/init_scripts/100_set_docker_host_ipaddress.sh
+++ b/server-ce/init_scripts/100_set_docker_host_ipaddress.sh
@@ -2,4 +2,4 @@
set -e -o pipefail
# See the bottom of http://stackoverflow.com/questions/24319662/from-inside-of-a-docker-container-how-do-i-connect-to-the-localhost-of-the-mach
-echo "`route -n | awk '/UG[ \t]/{print $2}'` dockerhost" >> /etc/hosts
+echo "$(route -n | awk '/UG[ \t]/{print $2}') dockerhost" >> /etc/hosts
diff --git a/server-ce/init_scripts/200_nginx_config_template.sh b/server-ce/init_scripts/200_nginx_config_template.sh
index f652574c6f..f5707260ce 100755
--- a/server-ce/init_scripts/200_nginx_config_template.sh
+++ b/server-ce/init_scripts/200_nginx_config_template.sh
@@ -26,6 +26,7 @@ if [ -f "${nginx_template_file}" ]; then
# Note the single-quotes, they are important.
# This is a pass-list of env-vars that envsubst
# should operate on.
+ # shellcheck disable=SC2016
envsubst '
${NGINX_KEEPALIVE_TIMEOUT}
${NGINX_WORKER_CONNECTIONS}
diff --git a/server-ce/mongodb-init-replica-set.js b/server-ce/mongodb-init-replica-set.js
deleted file mode 100644
index 8d993774c7..0000000000
--- a/server-ce/mongodb-init-replica-set.js
+++ /dev/null
@@ -1 +0,0 @@
-rs.initiate({ _id: "overleaf", members: [ { _id: 0, host: "mongo:27017" } ] })
diff --git a/server-ce/nginx/clsi-nginx.conf b/server-ce/nginx/clsi-nginx.conf
index 94ce060706..aac976ecd8 100644
--- a/server-ce/nginx/clsi-nginx.conf
+++ b/server-ce/nginx/clsi-nginx.conf
@@ -30,7 +30,7 @@ server {
application/pdf pdf;
}
# handle output files for specific users
- location ~ ^/project/([0-9a-f]+)/user/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z]+)$ {
+ location ~ ^/project/([0-9a-f]+)/user/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z.]+)$ {
alias /var/lib/overleaf/data/output/$1-$2/generated-files/$3/output.$4;
}
# handle .blg files for specific users
@@ -38,7 +38,7 @@ server {
alias /var/lib/overleaf/data/output/$1-$2/generated-files/$3/$4.blg;
}
# handle output files for anonymous users
- location ~ ^/project/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z]+)$ {
+ location ~ ^/project/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z.]+)$ {
alias /var/lib/overleaf/data/output/$1/generated-files/$2/output.$3;
}
# handle .blg files for anonymous users
diff --git a/server-ce/nginx/overleaf.conf b/server-ce/nginx/overleaf.conf
index 78af603c1e..77e59df5a0 100644
--- a/server-ce/nginx/overleaf.conf
+++ b/server-ce/nginx/overleaf.conf
@@ -47,12 +47,12 @@ server {
}
# handle output files for specific users
- location ~ ^/project/([0-9a-f]+)/user/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z]+)$ {
+ location ~ ^/project/([0-9a-f]+)/user/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z.]+)$ {
proxy_pass http://127.0.0.1:8080; # clsi-nginx.conf
proxy_http_version 1.1;
}
# handle output files for anonymous users
- location ~ ^/project/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z]+)$ {
+ location ~ ^/project/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z.]+)$ {
proxy_pass http://127.0.0.1:8080; # clsi-nginx.conf
proxy_http_version 1.1;
}
diff --git a/server-ce/runit/clsi-overleaf/run b/server-ce/runit/clsi-overleaf/run
index cb365ec75b..ece2031769 100755
--- a/server-ce/runit/clsi-overleaf/run
+++ b/server-ce/runit/clsi-overleaf/run
@@ -11,7 +11,7 @@ fi
if [ -e '/var/run/docker.sock' ]; then
echo ">> Setting permissions on docker socket"
DOCKER_GROUP=$(stat -c '%g' /var/run/docker.sock)
- groupadd --non-unique --gid ${DOCKER_GROUP} dockeronhost
+ groupadd --non-unique --gid "${DOCKER_GROUP}" dockeronhost
usermod -aG dockeronhost www-data
fi
diff --git a/server-ce/runit/references-overleaf/run b/server-ce/runit/references-overleaf/run
new file mode 100755
index 0000000000..875023df9f
--- /dev/null
+++ b/server-ce/runit/references-overleaf/run
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+NODE_PARAMS=""
+if [ "$DEBUG_NODE" == "true" ]; then
+ echo "running debug - references"
+ NODE_PARAMS="--inspect=0.0.0.0:30560"
+fi
+
+source /etc/overleaf/env.sh
+export LISTEN_ADDRESS=127.0.0.1
+
+exec /sbin/setuser www-data /usr/bin/node $NODE_PARAMS /overleaf/services/references/app.js >> /var/log/overleaf/references.log 2>&1
diff --git a/server-ce/services.js b/server-ce/services.js
index d0b0a9c076..e0282f3bad 100644
--- a/server-ce/services.js
+++ b/server-ce/services.js
@@ -29,6 +29,9 @@ module.exports = [
{
name: 'project-history',
},
+ {
+ name: 'references',
+ },
{
name: 'history-v1',
},
diff --git a/server-ce/test/Dockerfile b/server-ce/test/Dockerfile
index cbdf2f36b8..7cc86f7ff9 100644
--- a/server-ce/test/Dockerfile
+++ b/server-ce/test/Dockerfile
@@ -1,4 +1,4 @@
-FROM node:20.18.0
+FROM node:22.17.0
RUN curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add - \
&& echo \
"deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/debian $(. /etc/os-release && echo "$VERSION_CODENAME") stable" \
diff --git a/server-ce/test/Makefile b/server-ce/test/Makefile
index 1671b9f986..fb7c980293 100644
--- a/server-ce/test/Makefile
+++ b/server-ce/test/Makefile
@@ -6,8 +6,8 @@ all: test-e2e
# Notable the container labels com.docker.compose.project.working_dir and com.docker.compose.project.config_files need to match when creating containers from the docker host (how you started things) and from host-admin (how tests reconfigure the instance).
export PWD = $(shell pwd)
-export TEX_LIVE_DOCKER_IMAGE ?= gcr.io/overleaf-ops/texlive-full:2023.1
-export ALL_TEX_LIVE_DOCKER_IMAGES ?= gcr.io/overleaf-ops/texlive-full:2023.1,gcr.io/overleaf-ops/texlive-full:2022.1
+export TEX_LIVE_DOCKER_IMAGE ?= us-east1-docker.pkg.dev/overleaf-ops/ol-docker/texlive-full:2023.1
+export ALL_TEX_LIVE_DOCKER_IMAGES ?= us-east1-docker.pkg.dev/overleaf-ops/ol-docker/texlive-full:2023.1,us-east1-docker.pkg.dev/overleaf-ops/ol-docker/texlive-full:2022.1
export IMAGE_TAG_PRO ?= us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pro:latest
export CYPRESS_SHARD ?=
export COMPOSE_PROJECT_NAME ?= test
@@ -20,9 +20,12 @@ test-e2e-native:
npm run cypress:open
test-e2e:
+ docker compose build host-admin
+ docker compose up -d host-admin
docker compose up --no-log-prefix --exit-code-from=e2e e2e
test-e2e-open:
+ docker compose up -d host-admin
docker compose up --no-log-prefix --exit-code-from=e2e-open e2e-open
clean:
@@ -44,8 +47,8 @@ prefetch_custom_compose_pull:
prefetch_custom: prefetch_custom_texlive
prefetch_custom_texlive:
- echo -n "$$ALL_TEX_LIVE_DOCKER_IMAGES" | xargs -d, -I% \
- sh -exc 'tag=%; re_tag=quay.io/sharelatex/$${tag#*/*/}; docker pull $$tag; docker tag $$tag $$re_tag'
+ echo "$$ALL_TEX_LIVE_DOCKER_IMAGES" | tr ',' '\n' | xargs -I% \
+ sh -exc 'tag=%; re_tag=quay.io/sharelatex/$${tag#*/*/*/}; docker pull $$tag; docker tag $$tag $$re_tag'
prefetch_custom: prefetch_old
prefetch_old:
diff --git a/server-ce/test/accounts.spec.ts b/server-ce/test/accounts.spec.ts
index eeeb104087..85d545535a 100644
--- a/server-ce/test/accounts.spec.ts
+++ b/server-ce/test/accounts.spec.ts
@@ -9,7 +9,7 @@ describe('Accounts', function () {
it('can log in and out', function () {
login('user@example.com')
cy.visit('/project')
- cy.findByText('Account').click()
+ cy.findByRole('menuitem', { name: 'Account' }).click()
cy.findByText('Log Out').click()
cy.url().should('include', '/login')
cy.visit('/project')
diff --git a/server-ce/test/admin.spec.ts b/server-ce/test/admin.spec.ts
index 7a982bf672..50a89fb855 100644
--- a/server-ce/test/admin.spec.ts
+++ b/server-ce/test/admin.spec.ts
@@ -127,10 +127,12 @@ describe('admin panel', function () {
testProjectName = `project-${uuid()}`
deletedProjectName = `deleted-project-${uuid()}`
login(user1)
- cy.visit('/project')
- createProject(testProjectName).then(id => (testProjectId = id))
- cy.visit('/project')
- createProject(deletedProjectName).then(id => (projectToDeleteId = id))
+ createProject(testProjectName, { open: false }).then(
+ id => (testProjectId = id)
+ )
+ createProject(deletedProjectName, { open: false }).then(
+ id => (projectToDeleteId = id)
+ )
})
describe('manage site', () => {
@@ -177,6 +179,21 @@ describe('admin panel', function () {
cy.get('nav').findByText('Manage Users').click()
})
+ it('displays expected tabs', () => {
+ const tabs = ['Users', 'License Usage']
+ cy.get('[role="tab"]').each((el, index) => {
+ cy.wrap(el).findByText(tabs[index]).click()
+ })
+ cy.get('[role="tab"]').should('have.length', tabs.length)
+ })
+
+ it('license usage tab', () => {
+ cy.get('a').contains('License Usage').click()
+ cy.findByText(
+ 'An active user is one who has opened a project in this Server Pro instance in the last 12 months.'
+ )
+ })
+
describe('create users', () => {
beforeEach(() => {
cy.get('a').contains('New User').click()
@@ -291,8 +308,8 @@ describe('admin panel', function () {
cy.findByText(deletedProjectName).should('not.exist')
cy.log('navigate to thrashed projects and delete the project')
- cy.get('.project-list-sidebar-react').within(() => {
- cy.findByText('Trashed Projects').click()
+ cy.get('.project-list-sidebar-scroll').within(() => {
+ cy.findByText('Trashed projects').click()
})
findProjectRow(deletedProjectName).within(() =>
cy.findByRole('button', { name: 'Delete' }).click()
@@ -316,8 +333,8 @@ describe('admin panel', function () {
cy.log('login as the user and verify the project is restored')
login(user1)
cy.visit('/project')
- cy.get('.project-list-sidebar-react').within(() => {
- cy.findByText('Trashed Projects').click()
+ cy.get('.project-list-sidebar-scroll').within(() => {
+ cy.findByText('Trashed projects').click()
})
cy.findByText(`${deletedProjectName} (Restored)`)
})
diff --git a/server-ce/test/create-and-compile-project.spec.ts b/server-ce/test/create-and-compile-project.spec.ts
index 1bfcfa999a..a0e03fe8d0 100644
--- a/server-ce/test/create-and-compile-project.spec.ts
+++ b/server-ce/test/create-and-compile-project.spec.ts
@@ -1,5 +1,8 @@
import { ensureUserExists, login } from './helpers/login'
-import { createProject } from './helpers/project'
+import {
+ createProject,
+ openProjectViaInviteNotification,
+} from './helpers/project'
import { isExcludedBySharding, startWith } from './helpers/config'
import { throttledRecompile } from './helpers/compile'
@@ -11,10 +14,7 @@ describe('Project creation and compilation', function () {
it('users can create project and compile it', function () {
login('user@example.com')
- cy.visit('/project')
- // this is the first project created, the welcome screen is displayed instead of the project list
createProject('test-project')
- cy.url().should('match', /\/project\/[a-fA-F0-9]{24}/)
const recompile = throttledRecompile()
cy.findByText('\\maketitle').parent().click()
cy.findByText('\\maketitle').parent().type('\n\\section{{}Test Section}')
@@ -26,8 +26,8 @@ describe('Project creation and compilation', function () {
const fileName = `test-${Date.now()}.md`
const markdownContent = '# Markdown title'
login('user@example.com')
- cy.visit('/project')
createProject('test-project')
+
// FIXME: Add aria-label maybe? or at least data-test-id
cy.findByText('New file').click({ force: true })
cy.findByRole('dialog').within(() => {
@@ -40,9 +40,15 @@ describe('Project creation and compilation', function () {
cy.get('.cm-line').should('have.length', 1)
cy.get('.cm-line').type(markdownContent)
cy.findByText('main.tex').click()
- cy.get('.cm-content').should('contain.text', '\\maketitle')
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).should(
+ 'contain.text',
+ '\\maketitle'
+ )
cy.findByText(fileName).click()
- cy.get('.cm-content').should('contain.text', markdownContent)
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).should(
+ 'contain.text',
+ markdownContent
+ )
})
it('can link and display linked image from other project', function () {
@@ -50,12 +56,10 @@ describe('Project creation and compilation', function () {
const targetProjectName = `${sourceProjectName}-target`
login('user@example.com')
- cy.visit('/project')
- createProject(sourceProjectName, { type: 'Example Project' }).as(
- 'sourceProjectId'
- )
-
- cy.visit('/project')
+ createProject(sourceProjectName, {
+ type: 'Example project',
+ open: false,
+ }).as('sourceProjectId')
createProject(targetProjectName)
// link the image from `projectName` into this project
@@ -80,13 +84,10 @@ describe('Project creation and compilation', function () {
const sourceProjectName = `test-project-${Date.now()}`
const targetProjectName = `${sourceProjectName}-target`
login('user@example.com')
-
- cy.visit('/project')
- createProject(sourceProjectName, { type: 'Example Project' }).as(
- 'sourceProjectId'
- )
-
- cy.visit('/project')
+ createProject(sourceProjectName, {
+ type: 'Example project',
+ open: false,
+ }).as('sourceProjectId')
createProject(targetProjectName).as('targetProjectId')
// link the image from `projectName` into this project
@@ -100,24 +101,15 @@ describe('Project creation and compilation', function () {
cy.findByText('Share').click()
cy.findByRole('dialog').within(() => {
- cy.get('input').type('collaborator@example.com,')
- cy.findByText('Share').click({ force: true })
+ cy.findByTestId('collaborator-email-input').type(
+ 'collaborator@example.com,'
+ )
+ cy.findByText('Invite').click({ force: true })
+ cy.findByText('Invite not yet accepted.')
})
- cy.visit('/project')
- cy.findByText('Account').click()
- cy.findByText('Log Out').click()
-
login('collaborator@example.com')
- cy.visit('/project')
- cy.findByText(targetProjectName)
- .parent()
- .parent()
- .within(() => {
- cy.findByText('Join Project').click()
- })
- cy.findByText('Open Project').click()
- cy.url().should('match', /\/project\/[a-fA-F0-9]{24}/)
+ openProjectViaInviteNotification(targetProjectName)
cy.get('@targetProjectId').then(targetProjectId => {
cy.url().should('include', targetProjectId)
})
diff --git a/server-ce/test/docker-compose.yml b/server-ce/test/docker-compose.yml
index ee97a6cb01..d16c5e2b71 100644
--- a/server-ce/test/docker-compose.yml
+++ b/server-ce/test/docker-compose.yml
@@ -20,7 +20,7 @@ services:
OVERLEAF_EMAIL_SMTP_HOST: 'mailtrap'
OVERLEAF_EMAIL_SMTP_PORT: '25'
OVERLEAF_EMAIL_SMTP_IGNORE_TLS: 'true'
- ENABLED_LINKED_FILE_TYPES: 'project_file,project_output_file'
+ ENABLED_LINKED_FILE_TYPES: 'project_file,project_output_file,url'
ENABLE_CONVERSIONS: 'true'
EMAIL_CONFIRMATION_DISABLED: 'true'
healthcheck:
@@ -35,10 +35,10 @@ services:
MAILTRAP_PASSWORD: 'password-for-mailtrap'
mongo:
- image: mongo:5.0.17
+ image: mongo:8.0.11
command: '--replSet overleaf'
volumes:
- - ../mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ - ../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
environment:
MONGO_INITDB_DATABASE: sharelatex
extra_hosts:
@@ -46,7 +46,7 @@ services:
# This override is not needed when running the setup after starting up mongo.
- mongo:127.0.0.1
healthcheck:
- test: echo 'db.stats().ok' | mongo localhost:27017/test --quiet
+ test: echo 'db.stats().ok' | mongosh localhost:27017/test --quiet
interval: 3s
timeout: 3s
retries: 30
@@ -91,6 +91,7 @@ services:
volumes:
- ./:/e2e
- /tmp/.X11-unix:/tmp/.X11-unix
+ - ${XAUTHORITY:-/dev/null}:/home/node/.Xauthority
user: "${DOCKER_USER:-1000:1000}"
environment:
CYPRESS_SHARD:
@@ -131,7 +132,7 @@ services:
saml:
restart: always
- image: gcr.io/overleaf-ops/saml-test
+ image: us-east1-docker.pkg.dev/overleaf-ops/ol-docker/saml-test
environment:
SAML_TEST_SP_ENTITY_ID: 'sharelatex-test-saml'
SAML_BASE_URL_PATH: 'http://saml/simplesaml/'
diff --git a/server-ce/test/editor.spec.ts b/server-ce/test/editor.spec.ts
index 4baef39f5b..3e57b94f8f 100644
--- a/server-ce/test/editor.spec.ts
+++ b/server-ce/test/editor.spec.ts
@@ -1,7 +1,14 @@
-import { createProject } from './helpers/project'
+import {
+ createNewFile,
+ createProject,
+ openProjectById,
+ testNewFileUpload,
+} from './helpers/project'
import { isExcludedBySharding, startWith } from './helpers/config'
import { ensureUserExists, login } from './helpers/login'
import { v4 as uuid } from 'uuid'
+import { beforeWithReRunOnTestRetry } from './helpers/beforeWithReRunOnTestRetry'
+import { prepareWaitForNextCompileSlot } from './helpers/compile'
describe('editor', () => {
if (isExcludedBySharding('PRO_DEFAULT_1')) return
@@ -9,187 +16,80 @@ describe('editor', () => {
ensureUserExists({ email: 'user@example.com' })
ensureUserExists({ email: 'collaborator@example.com' })
- it('word dictionary and spelling', () => {
- const fileName = 'test.tex'
- const word = createRandomLetterString()
+ let projectName: string
+ let projectId: string
+ let recompile: () => void
+ let waitForCompileRateLimitCoolOff: (fn: () => void) => void
+ beforeWithReRunOnTestRetry(function () {
+ projectName = `project-${uuid()}`
login('user@example.com')
- cy.visit('/project')
- createProject('test-project')
-
- cy.log('create new project file')
- cy.get('button').contains('New file').click({ force: true })
- cy.findByRole('dialog').within(() => {
- cy.get('input').clear()
- cy.get('input').type(fileName)
- cy.findByText('Create').click()
- })
- cy.findByText(fileName).click()
-
- cy.log('edit project file')
- // wait until we've switched to the newly created empty file
- cy.get('.cm-line').should('have.length', 1)
- cy.get('.cm-line').type(word)
-
- cy.get('.ol-cm-spelling-error').should('exist')
-
- cy.log('change project language')
- cy.get('button').contains('Menu').click()
- cy.get('select[id=settings-menu-spellCheckLanguage]').select('Spanish')
- cy.get('[id="left-menu"]').type('{esc}') // close left menu
-
- cy.log('add word to dictionary')
- cy.get('.ol-cm-spelling-error').contains(word).rightclick()
- cy.findByText('Add to Dictionary').click()
- cy.get('.ol-cm-spelling-error').should('not.exist')
-
- cy.log('remove word from dictionary')
- cy.get('button').contains('Menu').click()
- cy.get('button').contains('Edit').click()
- cy.get('[id="dictionary-modal"').within(() => {
- cy.findByText(word)
- .parent()
- .within(() => cy.get('button').click())
-
- // the modal has 2 close buttons, this ensures the one with the visible label is
- // clicked, otherwise it would need `force: true`
- cy.get('.btn').contains('Close').click()
- })
-
- cy.log('close left panel')
- cy.get('[id="left-menu"]').type('{esc}')
-
- cy.log('rewrite word to force spelling error')
- cy.get('.cm-line').type('{selectAll}{del}' + word + '{enter}')
-
- cy.get('.ol-cm-spelling-error').should('contain.text', word)
+ createProject(projectName, { type: 'Example project', open: false }).then(
+ id => (projectId = id)
+ )
+ ;({ recompile, waitForCompileRateLimitCoolOff } =
+ prepareWaitForNextCompileSlot())
})
- describe('collaboration', () => {
- let projectId: string
+ beforeEach(() => {
+ login('user@example.com')
+ waitForCompileRateLimitCoolOff(() => {
+ openProjectById(projectId)
+ })
+ })
- beforeEach(() => {
- login('user@example.com')
- cy.visit(`/project`)
- createProject('test-editor', { type: 'Example Project' }).then(
- (id: string) => {
- projectId = id
+ describe('spelling', function () {
+ function changeSpellCheckLanguageTo(lng: string) {
+ cy.log(`change project language to '${lng}'`)
+ cy.get('button').contains('Menu').click()
+ cy.get('select[id=settings-menu-spellCheckLanguage]').select(lng)
+ cy.get('[id="left-menu"]').type('{esc}') // close left menu
+ }
- cy.log('make project shareable')
- cy.findByText('Share').click()
- cy.findByText('Turn on link sharing').click()
-
- cy.log('accept project invitation')
- cy.findByText('Anyone with this link can edit this project')
- .next()
- .should('contain.text', 'http://') // wait for the link to appear
- .then(el => {
- const linkSharingReadAndWrite = el.text()
- login('collaborator@example.com')
- cy.visit(linkSharingReadAndWrite)
- cy.get('button').contains('Join Project').click()
- cy.log(
- 'navigate to project dashboard to avoid cross session requests from editor'
- )
- cy.visit('/project')
- })
-
- login('user@example.com')
- cy.visit(`/project/${projectId}`)
- }
- )
+ afterEach(function () {
+ changeSpellCheckLanguageTo('Off')
})
- it('track-changes', () => {
- cy.log('enable track-changes for everyone')
- cy.findByText('Review').click()
- cy.get('.review-panel-toolbar-collapse-button').click() // make track-changes switches visible
+ it('word dictionary and spelling', () => {
+ changeSpellCheckLanguageTo('English (American)')
+ createNewFile()
+ const word = createRandomLetterString()
- cy.intercept('POST', '**/track_changes').as('enableTrackChanges')
- cy.findByText('Everyone')
- .parent()
- .within(() => cy.get('.input-switch').click())
- cy.wait('@enableTrackChanges')
+ cy.log('edit project file')
+ cy.get('.cm-line').type(word)
- login('collaborator@example.com')
- cy.visit(`/project/${projectId}`)
+ cy.get('.ol-cm-spelling-error').should('exist')
- cy.log('make changes in main file')
- // cy.type() "clicks" in the center of the selected element before typing. This "click" discards the text as selected by the dblclick.
- // Go down to the lower level event based typing, the frontend tests in web use similar events.
- cy.get('.cm-editor').as('editor')
- cy.get('@editor').findByText('\\maketitle').dblclick()
- cy.get('@editor').trigger('keydown', { key: 'Delete' })
- cy.get('@editor').trigger('keydown', { key: 'Enter' })
- cy.get('@editor').trigger('keydown', { key: 'Enter' })
+ changeSpellCheckLanguageTo('Spanish')
- cy.log('recompile to force flush')
- cy.findByText('Recompile').click()
+ cy.log('add word to dictionary')
+ cy.get('.ol-cm-spelling-error').contains(word).rightclick()
+ cy.findByText('Add to dictionary').click()
+ cy.get('.ol-cm-spelling-error').should('not.exist')
- login('user@example.com')
- cy.visit(`/project/${projectId}`)
+ cy.log('remove word from dictionary')
+ cy.get('button').contains('Menu').click()
+ cy.get('button#dictionary-settings').contains('Edit').click()
+ cy.get('[id="dictionary-modal"]').within(() => {
+ cy.findByText(word)
+ .parent()
+ .within(() => cy.get('button').click())
- cy.log('reject changes')
- cy.findByText('Review').click()
- cy.get('.cm-content').should('not.contain.text', '\\maketitle')
- cy.findByText('Reject').click({ force: true })
+ // the modal has 2 close buttons, this ensures the one with the visible label is
+ // clicked, otherwise it would need `force: true`
+ cy.get('.btn').contains('Close').click()
+ })
- cy.log('verify the changes are applied')
- cy.get('.cm-content').should('contain.text', '\\maketitle')
- })
+ cy.log('close left panel')
+ cy.get('[id="left-menu"]').type('{esc}')
- it('track-changes rich text', () => {
- cy.log('enable track-changes for everyone')
- cy.findByText('Visual Editor').click()
- cy.findByText('Review').click()
- cy.get('.review-panel-toolbar-collapse-button').click() // make track-changes switches visible
+ cy.log('rewrite word to force spelling error')
+ cy.get('.cm-line').type('{selectAll}{del}' + word + '{enter}')
- cy.intercept('POST', '**/track_changes').as('enableTrackChanges')
- cy.findByText('Everyone')
- .parent()
- .within(() => cy.get('.input-switch').click())
- cy.wait('@enableTrackChanges')
-
- login('collaborator@example.com')
- cy.visit(`/project/${projectId}`)
-
- cy.log('enable visual editor and make changes in main file')
- cy.findByText('Visual Editor').click()
-
- // cy.type() "clicks" in the center of the selected element before typing. This "click" discards the text as selected by the dblclick.
- // Go down to the lower level event based typing, the frontend tests in web use similar events.
- cy.get('.cm-editor').as('editor')
- cy.get('@editor').contains('Introduction').dblclick()
- cy.get('@editor').trigger('keydown', { key: 'Delete' })
- cy.get('@editor').trigger('keydown', { key: 'Enter' })
- cy.get('@editor').trigger('keydown', { key: 'Enter' })
-
- cy.log('recompile to force flush')
- cy.findByText('Recompile').click()
-
- login('user@example.com')
- cy.visit(`/project/${projectId}`)
-
- cy.log('reject changes')
- cy.findByText('Review').click()
- cy.get('.cm-content').should('not.contain.text', 'Introduction')
- cy.findAllByText('Reject').first().click({ force: true })
-
- cy.log('verify the changes are applied in the visual editor')
- cy.findByText('Visual Editor').click()
- cy.get('.cm-content').should('contain.text', 'Introduction')
+ cy.get('.ol-cm-spelling-error').should('contain.text', word)
})
})
describe('editor', () => {
- beforeEach(() => {
- login('user@example.com')
- cy.visit(`/project`)
- createProject(`project-${uuid()}`, { type: 'Example Project' })
- // wait until the main document is rendered
- cy.findByText(/Loading/).should('not.exist')
- cy.findByText(/Your Paper/)
- })
-
it('renders jpg', () => {
cy.findByTestId('file-tree').findByText('frog.jpg').click()
cy.get('[alt="frog.jpg"]')
@@ -199,40 +99,28 @@ describe('editor', () => {
})
it('symbol palette', () => {
+ createNewFile()
+
cy.get('button[aria-label="Toggle Symbol Palette"]').click({
force: true,
})
cy.get('button').contains('𝜉').click()
- cy.get('.cm-content').should('contain.text', '\\xi')
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).should(
+ 'contain.text',
+ '\\xi'
+ )
+
+ cy.log('recompile to force flush and avoid "unsaved changes" prompt')
+ recompile()
})
})
describe('add new file to project', () => {
- let projectName: string
-
beforeEach(() => {
- projectName = `project-${uuid()}`
- login('user@example.com')
- cy.visit(`/project`)
- createProject(projectName, { type: 'Example Project' })
cy.get('button').contains('New file').click({ force: true })
})
- it('can upload file', () => {
- cy.get('button').contains('Upload').click({ force: true })
- cy.get('input[type=file]')
- .first()
- .selectFile(
- {
- contents: Cypress.Buffer.from('Test File Content'),
- fileName: 'file.txt',
- lastModified: Date.now(),
- },
- { force: true }
- )
- cy.findByTestId('file-tree').findByText('file.txt').click({ force: true })
- cy.findByText('Test File Content')
- })
+ testNewFileUpload()
it('should not display import from URL', () => {
cy.findByText('From external URL').should('not.exist')
@@ -240,20 +128,15 @@ describe('editor', () => {
})
describe('left menu', () => {
- let projectName: string
-
beforeEach(() => {
- projectName = `project-${uuid()}`
- login('user@example.com')
- cy.visit(`/project`)
- createProject(projectName, { type: 'Example Project' })
cy.get('button').contains('Menu').click()
})
it('can download project sources', () => {
cy.get('a').contains('Source').click()
+ const zipName = projectName.replaceAll('-', '_')
cy.task('readFileInZip', {
- pathToZip: `cypress/downloads/${projectName}.zip`,
+ pathToZip: `cypress/downloads/${zipName}.zip`,
fileToRead: 'main.tex',
}).should('contain', 'Your introduction goes here')
})
@@ -292,13 +175,6 @@ describe('editor', () => {
})
describe('layout selector', () => {
- let projectId: string
- beforeEach(() => {
- login('user@example.com')
- cy.visit(`/project`)
- createProject(`project-${uuid()}`, { type: 'Example Project' })
- })
-
it('show editor only and switch between editor and pdf', () => {
cy.get('.pdf-viewer').should('be.visible')
cy.get('.cm-editor').should('be.visible')
diff --git a/server-ce/test/external-auth.spec.ts b/server-ce/test/external-auth.spec.ts
index 7e71ab9777..f26947e8a8 100644
--- a/server-ce/test/external-auth.spec.ts
+++ b/server-ce/test/external-auth.spec.ts
@@ -32,6 +32,9 @@ describe('SAML', () => {
cy.get('button[type="submit"]').click()
})
+ cy.log('wait for login to finish')
+ cy.url().should('contain', '/project')
+
createProject('via SAML')
})
})
@@ -62,6 +65,9 @@ describe('LDAP', () => {
cy.get('input[name="password"]').type('fry')
cy.get('button[type="submit"]').click()
+ cy.log('wait for login to finish')
+ cy.url().should('contain', '/project')
+
createProject('via LDAP')
})
})
diff --git a/server-ce/test/filestore-migration.spec.ts b/server-ce/test/filestore-migration.spec.ts
new file mode 100644
index 0000000000..25875ad374
--- /dev/null
+++ b/server-ce/test/filestore-migration.spec.ts
@@ -0,0 +1,104 @@
+import { ensureUserExists, login } from './helpers/login'
+import {
+ createProject,
+ openProjectById,
+ prepareFileUploadTest,
+} from './helpers/project'
+import { isExcludedBySharding, startWith } from './helpers/config'
+import { prepareWaitForNextCompileSlot } from './helpers/compile'
+import { beforeWithReRunOnTestRetry } from './helpers/beforeWithReRunOnTestRetry'
+import { v4 as uuid } from 'uuid'
+import { purgeFilestoreData, runScript } from './helpers/hostAdminClient'
+
+describe('filestore migration', function () {
+ if (isExcludedBySharding('CE_CUSTOM_3')) return
+ startWith({ withDataDir: true, resetData: true, vars: {} })
+ ensureUserExists({ email: 'user@example.com' })
+
+ let projectName: string
+ let projectId: string
+ let waitForCompileRateLimitCoolOff: (fn: () => void) => void
+ const previousBinaryFiles: (() => void)[] = []
+ beforeWithReRunOnTestRetry(function () {
+ projectName = `project-${uuid()}`
+ login('user@example.com')
+ createProject(projectName, { type: 'Example project' }).then(
+ id => (projectId = id)
+ )
+ let queueReset
+ ;({ waitForCompileRateLimitCoolOff, queueReset } =
+ prepareWaitForNextCompileSlot())
+ queueReset()
+ previousBinaryFiles.push(prepareFileUploadTest(true))
+ })
+
+ beforeEach(() => {
+ login('user@example.com')
+ waitForCompileRateLimitCoolOff(() => {
+ openProjectById(projectId)
+ })
+ })
+
+ function checkFilesAreAccessible() {
+ it('can upload new binary file and read previous uploads', function () {
+ previousBinaryFiles.push(prepareFileUploadTest(true))
+ for (const check of previousBinaryFiles) {
+ check()
+ }
+ })
+
+ it('renders frog jpg', () => {
+ cy.findByTestId('file-tree').findByText('frog.jpg').click()
+ cy.get('[alt="frog.jpg"]')
+ .should('be.visible')
+ .and('have.prop', 'naturalWidth')
+ .should('be.greaterThan', 0)
+ })
+ }
+
+ describe('OVERLEAF_FILESTORE_MIGRATION_LEVEL not set', function () {
+ startWith({ withDataDir: true, vars: {} })
+ checkFilesAreAccessible()
+ })
+
+ describe('OVERLEAF_FILESTORE_MIGRATION_LEVEL=0', function () {
+ startWith({
+ withDataDir: true,
+ vars: { OVERLEAF_FILESTORE_MIGRATION_LEVEL: '0' },
+ })
+ checkFilesAreAccessible()
+
+ describe('OVERLEAF_FILESTORE_MIGRATION_LEVEL=1', function () {
+ startWith({
+ withDataDir: true,
+ vars: { OVERLEAF_FILESTORE_MIGRATION_LEVEL: '1' },
+ })
+ checkFilesAreAccessible()
+
+ describe('OVERLEAF_FILESTORE_MIGRATION_LEVEL=2', function () {
+ startWith({
+ withDataDir: true,
+ vars: { OVERLEAF_FILESTORE_MIGRATION_LEVEL: '1' },
+ })
+ before(async function () {
+ await runScript({
+ cwd: 'services/history-v1',
+ script: 'storage/scripts/back_fill_file_hash.mjs',
+ })
+ })
+ startWith({
+ withDataDir: true,
+ vars: { OVERLEAF_FILESTORE_MIGRATION_LEVEL: '2' },
+ })
+ checkFilesAreAccessible()
+
+ describe('purge filestore data', function () {
+ before(async function () {
+ await purgeFilestoreData()
+ })
+ checkFilesAreAccessible()
+ })
+ })
+ })
+ })
+})
diff --git a/server-ce/test/git-bridge.spec.ts b/server-ce/test/git-bridge.spec.ts
index ee2aff41ed..1f114574ac 100644
--- a/server-ce/test/git-bridge.spec.ts
+++ b/server-ce/test/git-bridge.spec.ts
@@ -4,6 +4,8 @@ import { ensureUserExists, login } from './helpers/login'
import {
createProject,
enableLinkSharing,
+ openProjectByName,
+ openProjectViaLinkSharingAsUser,
shareProjectByEmailAndAcceptInviteViaDash,
} from './helpers/project'
@@ -20,7 +22,12 @@ describe('git-bridge', function () {
V1_HISTORY_URL: 'http://sharelatex:3100/api',
}
- const gitBridgePublicHost = new URL(Cypress.config().baseUrl!).host
+ function gitURL(projectId: string) {
+ const url = new URL(Cypress.config().baseUrl!)
+ url.username = 'git'
+ url.pathname = `/git/${projectId}`
+ return url
+ }
describe('enabled in Server Pro', function () {
if (isExcludedBySharding('PRO_CUSTOM_1')) return
@@ -39,7 +46,7 @@ describe('git-bridge', function () {
function maybeClearAllTokens() {
cy.visit('/user/settings')
- cy.findByText('Git Integration')
+ cy.findByText('Git integration')
cy.get('button')
.contains(/Generate token|Add another token/)
.then(btn => {
@@ -56,7 +63,7 @@ describe('git-bridge', function () {
it('should render the git-bridge UI in the settings', () => {
maybeClearAllTokens()
cy.visit('/user/settings')
- cy.findByText('Git Integration')
+ cy.findByText('Git integration')
cy.get('button').contains('Generate token').click()
cy.get('code')
.contains(/olp_[a-zA-Z0-9]{16}/)
@@ -77,19 +84,16 @@ describe('git-bridge', function () {
it('should render the git-bridge UI in the editor', function () {
maybeClearAllTokens()
- cy.visit('/project')
createProject('git').as('projectId')
cy.get('header').findByText('Menu').click()
cy.findByText('Sync')
cy.findByText('Git').click()
- cy.findByRole('dialog').within(() => {
+ cy.findByTestId('git-bridge-modal').within(() => {
cy.get('@projectId').then(id => {
- cy.get('code').contains(
- `git clone http://git@${gitBridgePublicHost}/git/${id}`
- )
+ cy.get('code').contains(`git clone ${gitURL(id.toString())}`)
})
cy.findByRole('button', {
- name: 'Generate token',
+ name: /generate token/i,
}).click()
cy.get('code').contains(/olp_[a-zA-Z0-9]{16}/)
})
@@ -98,14 +102,12 @@ describe('git-bridge', function () {
cy.url().then(url => cy.visit(url))
cy.get('header').findByText('Menu').click()
cy.findByText('Git').click()
- cy.findByRole('dialog').within(() => {
+ cy.findByTestId('git-bridge-modal').within(() => {
cy.get('@projectId').then(id => {
- cy.get('code').contains(
- `git clone http://git@${gitBridgePublicHost}/git/${id}`
- )
+ cy.get('code').contains(`git clone ${gitURL(id.toString())}`)
})
cy.findByText('Generate token').should('not.exist')
- cy.findByText(/generate a new one in Account Settings/)
+ cy.findByText(/generate a new one in Account settings/)
cy.findByText('Go to settings')
.should('have.attr', 'target', '_blank')
.and('have.attr', 'href', '/user/settings')
@@ -120,15 +122,13 @@ describe('git-bridge', function () {
let projectName: string
beforeEach(() => {
- cy.visit('/project')
projectName = uuid()
- createProject(projectName).as('projectId')
+ createProject(projectName, { open: false }).as('projectId')
})
it('should expose r/w interface to owner', () => {
maybeClearAllTokens()
- cy.visit('/project')
- cy.findByText(projectName).click()
+ openProjectByName(projectName)
checkGitAccess('readAndWrite')
})
@@ -136,11 +136,10 @@ describe('git-bridge', function () {
shareProjectByEmailAndAcceptInviteViaDash(
projectName,
'collaborator-rw@example.com',
- 'Can edit'
+ 'Editor'
)
maybeClearAllTokens()
- cy.visit('/project')
- cy.findByText(projectName).click()
+ openProjectByName(projectName)
checkGitAccess('readAndWrite')
})
@@ -148,32 +147,39 @@ describe('git-bridge', function () {
shareProjectByEmailAndAcceptInviteViaDash(
projectName,
'collaborator-ro@example.com',
- 'Read only'
+ 'Viewer'
)
maybeClearAllTokens()
- cy.visit('/project')
- cy.findByText(projectName).click()
+ openProjectByName(projectName)
checkGitAccess('readOnly')
})
it('should expose r/w interface to link-sharing r/w collaborator', () => {
+ openProjectByName(projectName)
enableLinkSharing().then(({ linkSharingReadAndWrite }) => {
- login('collaborator-link-rw@example.com')
+ const email = 'collaborator-link-rw@example.com'
+ login(email)
maybeClearAllTokens()
- cy.visit(linkSharingReadAndWrite)
- cy.findByText(projectName) // wait for lazy loading
- cy.findByText('Join Project').click()
+ openProjectViaLinkSharingAsUser(
+ linkSharingReadAndWrite,
+ projectName,
+ email
+ )
checkGitAccess('readAndWrite')
})
})
it('should expose r/o interface to link-sharing r/o collaborator', () => {
+ openProjectByName(projectName)
enableLinkSharing().then(({ linkSharingReadOnly }) => {
- login('collaborator-link-ro@example.com')
+ const email = 'collaborator-link-ro@example.com'
+ login(email)
maybeClearAllTokens()
- cy.visit(linkSharingReadOnly)
- cy.findByText(projectName) // wait for lazy loading
- cy.findByText('Join Project').click()
+ openProjectViaLinkSharingAsUser(
+ linkSharingReadOnly,
+ projectName,
+ email
+ )
checkGitAccess('readOnly')
})
})
@@ -186,13 +192,11 @@ describe('git-bridge', function () {
cy.findByText('Sync')
cy.findByText('Git').click()
cy.get('@projectId').then(projectId => {
- cy.findByRole('dialog').within(() => {
- cy.get('code').contains(
- `git clone http://git@${gitBridgePublicHost}/git/${projectId}`
- )
+ cy.findByTestId('git-bridge-modal').within(() => {
+ cy.get('code').contains(`git clone ${gitURL(projectId.toString())}`)
})
cy.findByRole('button', {
- name: 'Generate token',
+ name: /generate token/i,
}).click()
cy.get('code')
.contains(/olp_[a-zA-Z0-9]{16}/)
@@ -202,7 +206,7 @@ describe('git-bridge', function () {
// close Git modal
cy.findAllByText('Close').last().click()
// close editor menu
- cy.get('#left-menu-modal').click()
+ cy.get('.left-menu-modal-backdrop').click()
const fs = new LightningFS('fs')
const dir = `/${projectId}`
@@ -229,9 +233,11 @@ describe('git-bridge', function () {
dir,
fs,
}
+ const url = gitURL(projectId.toString())
+ url.username = '' // basic auth is specified separately.
const httpOptions = {
http,
- url: `http://sharelatex/git/${projectId}`,
+ url: url.toString(),
headers: {
Authorization: `Basic ${Buffer.from(`git:${token}`).toString('base64')}`,
},
@@ -359,11 +365,10 @@ Hello world
it('should not render the git-bridge UI in the settings', () => {
login('user@example.com')
cy.visit('/user/settings')
- cy.findByText('Git Integration').should('not.exist')
+ cy.findByText('Git integration').should('not.exist')
})
it('should not render the git-bridge UI in the editor', function () {
login('user@example.com')
- cy.visit('/project')
createProject('maybe git')
cy.get('header').findByText('Menu').click()
cy.findByText('Word Count') // wait for lazy loading
diff --git a/server-ce/test/graceful-shutdown.spec.ts b/server-ce/test/graceful-shutdown.spec.ts
index 8201b55b76..40dc144be9 100644
--- a/server-ce/test/graceful-shutdown.spec.ts
+++ b/server-ce/test/graceful-shutdown.spec.ts
@@ -31,8 +31,6 @@ describe('GracefulShutdown', function () {
it('should display banner and flush changes out of redis', () => {
bringServerProBackUp()
login(USER)
-
- cy.visit('/project')
createProject(PROJECT_NAME).then(id => {
projectId = id
})
diff --git a/server-ce/test/helpers/compile.ts b/server-ce/test/helpers/compile.ts
index e65b36f332..d41e43221f 100644
--- a/server-ce/test/helpers/compile.ts
+++ b/server-ce/test/helpers/compile.ts
@@ -4,22 +4,45 @@
* This helper takes into account that other UI interactions take time. We can deduce that latency from the fixed delay (3s minus other latency). This can bring down the effective waiting time to 0s.
*/
export function throttledRecompile() {
+ const { queueReset, recompile } = prepareWaitForNextCompileSlot()
+ queueReset()
+ return recompile
+}
+
+export function stopCompile(options: { delay?: number } = {}) {
+ const { delay = 0 } = options
+ cy.wait(delay)
+ cy.log('Stop compile')
+ cy.findByRole('button', { name: 'Toggle compile options menu' }).click()
+ cy.findByRole('menuitem', { name: 'Stop compilation' }).click()
+}
+
+export function prepareWaitForNextCompileSlot() {
let lastCompile = 0
function queueReset() {
cy.then(() => {
lastCompile = Date.now()
})
}
-
- queueReset()
- return () =>
+ function waitForCompileRateLimitCoolOff(triggerCompile: () => void) {
cy.then(() => {
- cy.log('Recompile without hitting rate-limit')
+ cy.log('Wait for recompile rate-limit to cool off')
const msSinceLastCompile = Date.now() - lastCompile
cy.wait(Math.max(0, 1_000 - msSinceLastCompile))
- cy.findByText('Recompile').click()
queueReset()
- cy.log('Wait for recompile to finish')
- cy.findByText('Recompile')
+ triggerCompile()
+ cy.log('Wait for compile to finish')
+ cy.findByText('Recompile').should('be.visible')
})
+ }
+ function recompile() {
+ waitForCompileRateLimitCoolOff(() => {
+ cy.findByText('Recompile').click()
+ })
+ }
+ return {
+ queueReset,
+ waitForCompileRateLimitCoolOff,
+ recompile,
+ }
}
diff --git a/server-ce/test/helpers/config.ts b/server-ce/test/helpers/config.ts
index 030e70ceb5..78e81be1f7 100644
--- a/server-ce/test/helpers/config.ts
+++ b/server-ce/test/helpers/config.ts
@@ -9,6 +9,7 @@ export function isExcludedBySharding(
| 'CE_DEFAULT'
| 'CE_CUSTOM_1'
| 'CE_CUSTOM_2'
+ | 'CE_CUSTOM_3'
| 'PRO_DEFAULT_1'
| 'PRO_DEFAULT_2'
| 'PRO_CUSTOM_1'
diff --git a/server-ce/test/helpers/hostAdminClient.ts b/server-ce/test/helpers/hostAdminClient.ts
index cafeaa2db6..dadfe2b059 100644
--- a/server-ce/test/helpers/hostAdminClient.ts
+++ b/server-ce/test/helpers/hostAdminClient.ts
@@ -85,6 +85,12 @@ export async function getRedisKeys() {
return stdout.split('\n')
}
+export async function purgeFilestoreData() {
+ await fetchJSON(`${hostAdminURL}/data/user_files`, {
+ method: 'DELETE',
+ })
+}
+
async function sleep(ms: number) {
return new Promise(resolve => {
setTimeout(resolve, ms)
diff --git a/server-ce/test/helpers/login.ts b/server-ce/test/helpers/login.ts
index 1883e6da09..fa95abec1d 100644
--- a/server-ce/test/helpers/login.ts
+++ b/server-ce/test/helpers/login.ts
@@ -68,7 +68,8 @@ export function login(username: string, password = DEFAULT_PASSWORD) {
{
cacheAcrossSpecs: true,
async validate() {
- cy.request({ url: '/project', followRedirect: false }).then(
+ // Hit a cheap endpoint that is behind AuthenticationController.requireLogin().
+ cy.request({ url: '/user/personal_info', followRedirect: false }).then(
response => {
expect(response.status).to.equal(200)
}
diff --git a/server-ce/test/helpers/project.ts b/server-ce/test/helpers/project.ts
index c4d885a57f..4b3197afed 100644
--- a/server-ce/test/helpers/project.ts
+++ b/server-ce/test/helpers/project.ts
@@ -1,67 +1,141 @@
import { login } from './login'
import { openEmail } from './email'
+import { v4 as uuid } from 'uuid'
export function createProject(
name: string,
{
- type = 'Blank Project',
+ type = 'Blank project',
newProjectButtonMatcher = /new project/i,
+ open = true,
}: {
- type?: 'Blank Project' | 'Example Project'
+ type?: 'Blank project' | 'Example project'
newProjectButtonMatcher?: RegExp
+ open?: boolean
} = {}
): Cypress.Chainable {
+ cy.url().then(url => {
+ if (!url.endsWith('/project')) {
+ cy.visit('/project')
+ }
+ })
+ const interceptId = uuid()
+ let projectId = ''
+ if (!open) {
+ cy.then(() => {
+ // Register intercept just before creating the project, otherwise we might
+ // intercept a request from a prior createProject invocation.
+ cy.intercept(
+ { method: 'GET', url: /\/project\/[a-fA-F0-9]{24}$/, times: 1 },
+ req => {
+ projectId = req.url.split('/').pop()!
+ // Redirect back to the project dashboard, effectively reload the page.
+ req.redirect('/project')
+ }
+ ).as(interceptId)
+ })
+ }
cy.findAllByRole('button').contains(newProjectButtonMatcher).click()
// FIXME: This should only look in the left menu
- cy.findAllByText(type).first().click()
+ // The upgrading tests create projects in older versions of Server Pro which used different casing of the project type. Use case-insensitive match.
+ cy.findAllByText(type, { exact: false }).first().click()
cy.findByRole('dialog').within(() => {
cy.get('input').type(name)
cy.findByText('Create').click()
})
- return cy
- .url()
- .should('match', /\/project\/[a-fA-F0-9]{24}/)
- .then(url => url.split('/').pop())
+ if (open) {
+ cy.url().should('match', /\/project\/[a-fA-F0-9]{24}/)
+ waitForMainDocToLoad()
+ return cy
+ .url()
+ .should('match', /\/project\/[a-fA-F0-9]{24}/)
+ .then(url => url.split('/').pop())
+ } else {
+ const alias = `@${interceptId}` // IDEs do not like computed values in cy.wait().
+ cy.wait(alias)
+ return cy.then(() => projectId)
+ }
+}
+
+export function openProjectByName(projectName: string) {
+ cy.visit('/project')
+ cy.findByText(projectName).click()
+ waitForMainDocToLoad()
+}
+
+export function openProjectById(projectId: string) {
+ cy.visit(`/project/${projectId}`)
+ waitForMainDocToLoad()
+}
+
+export function openProjectViaLinkSharingAsAnon(url: string) {
+ cy.visit(url)
+ waitForMainDocToLoad()
+}
+
+export function openProjectViaLinkSharingAsUser(
+ url: string,
+ projectName: string,
+ email: string
+) {
+ cy.visit(url)
+ cy.findByText(projectName) // wait for lazy loading
+ cy.contains(`as ${email}`)
+ cy.findByText('OK, join project').click()
+ waitForMainDocToLoad()
+}
+
+export function openProjectViaInviteNotification(projectName: string) {
+ cy.visit('/project')
+ cy.findByText(projectName)
+ .parent()
+ .parent()
+ .within(() => {
+ cy.findByText('Join Project').click()
+ })
+ cy.findByText('Open Project').click()
+ cy.url().should('match', /\/project\/[a-fA-F0-9]{24}/)
+ waitForMainDocToLoad()
}
function shareProjectByEmail(
projectName: string,
email: string,
- level: 'Read only' | 'Can edit'
+ level: 'Viewer' | 'Editor'
) {
- cy.visit('/project')
- cy.findByText(projectName).click()
+ openProjectByName(projectName)
cy.findByText('Share').click()
cy.findByRole('dialog').within(() => {
- cy.get('input').type(`${email},`)
- cy.get('input')
+ cy.findByLabelText('Add people', { selector: 'input' }).type(`${email},`)
+ cy.findByLabelText('Add people', { selector: 'input' })
.parents('form')
- .within(() => cy.findByText('Can edit').parent().select(level))
- cy.findByText('Share').click({ force: true })
+ .within(() => {
+ cy.findByTestId('add-collaborator-select')
+ .click()
+ .then(() => {
+ cy.findByText(level).click()
+ })
+ })
+ cy.findByText('Invite').click({ force: true })
+ cy.findByText('Invite not yet accepted.')
})
}
export function shareProjectByEmailAndAcceptInviteViaDash(
projectName: string,
email: string,
- level: 'Read only' | 'Can edit'
+ level: 'Viewer' | 'Editor'
) {
shareProjectByEmail(projectName, email, level)
login(email)
- cy.visit('/project')
- cy.findByText(new RegExp(projectName))
- .parent()
- .parent()
- .within(() => {
- cy.findByText('Join Project').click()
- })
+ openProjectViaInviteNotification(projectName)
}
export function shareProjectByEmailAndAcceptInviteViaEmail(
projectName: string,
email: string,
- level: 'Read only' | 'Can edit'
+ level: 'Viewer' | 'Editor'
) {
shareProjectByEmail(projectName, email, level)
@@ -79,23 +153,27 @@ export function shareProjectByEmailAndAcceptInviteViaEmail(
cy.findByText(/user would like you to join/)
cy.contains(new RegExp(`You are accepting this invite as ${email}`))
cy.findByText('Join Project').click()
+ waitForMainDocToLoad()
}
export function enableLinkSharing() {
let linkSharingReadOnly: string
let linkSharingReadAndWrite: string
+ const origin = new URL(Cypress.config().baseUrl!).origin
+
+ waitForMainDocToLoad()
cy.findByText('Share').click()
cy.findByText('Turn on link sharing').click()
cy.findByText('Anyone with this link can view this project')
.next()
- .should('contain.text', 'http://sharelatex/')
+ .should('contain.text', origin + '/read')
.then(el => {
linkSharingReadOnly = el.text()
})
cy.findByText('Anyone with this link can edit this project')
.next()
- .should('contain.text', 'http://sharelatex/')
+ .should('contain.text', origin + '/')
.then(el => {
linkSharingReadAndWrite = el.text()
})
@@ -104,3 +182,77 @@ export function enableLinkSharing() {
return { linkSharingReadOnly, linkSharingReadAndWrite }
})
}
+
+export function waitForMainDocToLoad() {
+ cy.log('Wait for main doc to load; it will steal the focus after loading')
+ cy.get('.cm-content').should('contain.text', 'Introduction')
+}
+
+export function openFile(fileName: string, waitFor: string) {
+ // force: The file-tree pane is too narrow to display the full name.
+ cy.findByTestId('file-tree').findByText(fileName).click({ force: true })
+
+ // wait until we've switched to the selected file
+ cy.findByText('Loading…').should('not.exist')
+ cy.findByText(waitFor)
+}
+
+export function createNewFile() {
+ const fileName = `${uuid()}.tex`
+
+ cy.log('create new project file')
+ cy.get('button').contains('New file').click({ force: true })
+ cy.findByRole('dialog').within(() => {
+ cy.get('input').clear()
+ cy.get('input').type(fileName)
+ cy.findByText('Create').click()
+ })
+ // force: The file-tree pane is too narrow to display the full name.
+ cy.findByTestId('file-tree').findByText(fileName).click({ force: true })
+
+ // wait until we've switched to the newly created empty file
+ cy.findByText('Loading…').should('not.exist')
+ cy.get('.cm-line').should('have.length', 1)
+
+ return fileName
+}
+
+export function prepareFileUploadTest(binary = false) {
+ const name = `${uuid()}.txt`
+ const content = `Test File Content ${name}${binary ? ' \x00' : ''}`
+ cy.get('button').contains('Upload').click({ force: true })
+ cy.get('input[type=file]')
+ .first()
+ .selectFile(
+ {
+ contents: Cypress.Buffer.from(content),
+ fileName: name,
+ lastModified: Date.now(),
+ },
+ { force: true }
+ )
+
+ // wait for the upload to finish
+ cy.findByRole('treeitem', { name })
+
+ return function check() {
+ cy.findByRole('treeitem', { name }).click()
+ if (binary) {
+ cy.findByText(content).should('not.have.class', 'cm-line')
+ } else {
+ cy.findByText(content).should('have.class', 'cm-line')
+ }
+ }
+}
+
+export function testNewFileUpload() {
+ it('can upload text file', () => {
+ const check = prepareFileUploadTest(false)
+ check()
+ })
+
+ it('can upload binary file', () => {
+ const check = prepareFileUploadTest(true)
+ check()
+ })
+}
diff --git a/server-ce/test/history.spec.ts b/server-ce/test/history.spec.ts
index cc1950f240..f0d7e74fb3 100644
--- a/server-ce/test/history.spec.ts
+++ b/server-ce/test/history.spec.ts
@@ -40,7 +40,6 @@ describe('History', function () {
const CLASS_DELETION = 'ol-cm-deletion-marker'
it('should support labels, comparison and download', () => {
- cy.visit('/project')
createProject('labels')
const recompile = throttledRecompile()
diff --git a/server-ce/test/host-admin.js b/server-ce/test/host-admin.js
index 9e4cd5d360..b3dcd72b1f 100644
--- a/server-ce/test/host-admin.js
+++ b/server-ce/test/host-admin.js
@@ -29,6 +29,17 @@ const IMAGES = {
PRO: process.env.IMAGE_TAG_PRO.replace(/:.+/, ''),
}
+function defaultDockerComposeOverride() {
+ return {
+ services: {
+ sharelatex: {
+ environment: {},
+ },
+ 'git-bridge': {},
+ },
+ }
+}
+
let previousConfig = ''
function readDockerComposeOverride() {
@@ -38,14 +49,7 @@ function readDockerComposeOverride() {
if (error.code !== 'ENOENT') {
throw error
}
- return {
- services: {
- sharelatex: {
- environment: {},
- },
- 'git-bridge': {},
- },
- }
+ return defaultDockerComposeOverride
}
}
@@ -77,12 +81,21 @@ app.use(bodyParser.json())
app.use((req, res, next) => {
// Basic access logs
console.log(req.method, req.url, req.body)
+ const json = res.json
+ res.json = body => {
+ console.log(req.method, req.url, req.body, '->', body)
+ json.call(res, body)
+ }
+ next()
+})
+app.use((req, res, next) => {
// Add CORS headers
const accessControlAllowOrigin =
process.env.ACCESS_CONTROL_ALLOW_ORIGIN || 'http://sharelatex'
res.setHeader('Access-Control-Allow-Origin', accessControlAllowOrigin)
res.setHeader('Access-Control-Allow-Headers', 'Content-Type')
res.setHeader('Access-Control-Max-Age', '3600')
+ res.setHeader('Access-Control-Allow-Methods', 'DELETE, GET, HEAD, POST, PUT')
next()
})
@@ -131,10 +144,9 @@ const allowedVars = Joi.object(
'GIT_BRIDGE_HOST',
'GIT_BRIDGE_PORT',
'V1_HISTORY_URL',
- 'DOCKER_RUNNER',
'SANDBOXED_COMPILES',
- 'SANDBOXED_COMPILES_SIBLING_CONTAINERS',
'ALL_TEX_LIVE_DOCKER_IMAGE_NAMES',
+ 'OVERLEAF_FILESTORE_MIGRATION_LEVEL',
'OVERLEAF_TEMPLATES_USER_ID',
'OVERLEAF_NEW_PROJECT_TEMPLATE_LINKS',
'OVERLEAF_ALLOW_PUBLIC_ACCESS',
@@ -196,10 +208,7 @@ function setVarsDockerCompose({ pro, vars, version, withDataDir }) {
)
}
- if (
- cfg.services.sharelatex.environment
- .SANDBOXED_COMPILES_SIBLING_CONTAINERS === 'true'
- ) {
+ if (cfg.services.sharelatex.environment.SANDBOXED_COMPILES === 'true') {
cfg.services.sharelatex.environment.SANDBOXED_COMPILES_HOST_DIR =
PATHS.SANDBOXED_COMPILES_HOST_DIR
cfg.services.sharelatex.environment.TEX_LIVE_DOCKER_IMAGE =
@@ -324,8 +333,19 @@ app.get('/redis/keys', (req, res) => {
)
})
+app.delete('/data/user_files', (req, res) => {
+ runDockerCompose(
+ 'exec',
+ ['sharelatex', 'rm', '-rf', '/var/lib/overleaf/data/user_files'],
+ (error, stdout, stderr) => {
+ res.json({ error, stdout, stderr })
+ }
+ )
+})
+
app.use(handleValidationErrors())
purgeDataDir()
+writeDockerComposeOverride(defaultDockerComposeOverride())
app.listen(80)
diff --git a/server-ce/test/package-lock.json b/server-ce/test/package-lock.json
index 6e84524ae0..05870284e8 100644
--- a/server-ce/test/package-lock.json
+++ b/server-ce/test/package-lock.json
@@ -12,10 +12,10 @@
"@types/pdf-parse": "^1.1.4",
"@types/uuid": "^9.0.8",
"adm-zip": "^0.5.12",
- "body-parser": "^1.20.2",
+ "body-parser": "^1.20.3",
"celebrate": "^15.0.3",
"cypress": "13.13.2",
- "express": "^4.19.2",
+ "express": "^4.21.2",
"isomorphic-git": "^1.25.10",
"js-yaml": "^4.1.0",
"pdf-parse": "^1.1.1",
@@ -609,9 +609,9 @@
"integrity": "sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg=="
},
"node_modules/body-parser": {
- "version": "1.20.2",
- "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz",
- "integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==",
+ "version": "1.20.3",
+ "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz",
+ "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==",
"dependencies": {
"bytes": "3.1.2",
"content-type": "~1.0.5",
@@ -621,7 +621,7 @@
"http-errors": "2.0.0",
"iconv-lite": "0.4.24",
"on-finished": "2.4.1",
- "qs": "6.11.0",
+ "qs": "6.13.0",
"raw-body": "2.5.2",
"type-is": "~1.6.18",
"unpipe": "1.0.0"
@@ -645,11 +645,11 @@
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
},
"node_modules/body-parser/node_modules/qs": {
- "version": "6.11.0",
- "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",
- "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==",
+ "version": "6.13.0",
+ "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
+ "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==",
"dependencies": {
- "side-channel": "^1.0.4"
+ "side-channel": "^1.0.6"
},
"engines": {
"node": ">=0.6"
@@ -718,6 +718,33 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/call-bind-apply-helpers": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
+ "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
+ "dependencies": {
+ "es-errors": "^1.3.0",
+ "function-bind": "^1.1.2"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
+ "node_modules/call-bound": {
+ "version": "1.0.3",
+ "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.3.tgz",
+ "integrity": "sha512-YTd+6wGlNlPxSuri7Y6X8tY2dmm12UMH66RpKMhiX6rsk5wXXnYgbUcOt8kiS31/AjfoTOvCsE+w8nZQLQnzHA==",
+ "dependencies": {
+ "call-bind-apply-helpers": "^1.0.1",
+ "get-intrinsic": "^1.2.6"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/caseless": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
@@ -891,9 +918,9 @@
}
},
"node_modules/cookie": {
- "version": "0.6.0",
- "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz",
- "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==",
+ "version": "0.7.1",
+ "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz",
+ "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==",
"engines": {
"node": ">= 0.6"
}
@@ -920,9 +947,9 @@
}
},
"node_modules/cross-spawn": {
- "version": "7.0.3",
- "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
- "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+ "version": "7.0.6",
+ "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+ "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
"dependencies": {
"path-key": "^3.1.0",
"shebang-command": "^2.0.0",
@@ -1140,6 +1167,19 @@
"resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz",
"integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg=="
},
+ "node_modules/dunder-proto": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
+ "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+ "dependencies": {
+ "call-bind-apply-helpers": "^1.0.1",
+ "es-errors": "^1.3.0",
+ "gopd": "^1.2.0"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
"node_modules/ecc-jsbn": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz",
@@ -1160,9 +1200,9 @@
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
},
"node_modules/encodeurl": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
- "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
+ "version": "2.0.0",
+ "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
+ "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
"engines": {
"node": ">= 0.8"
}
@@ -1187,6 +1227,22 @@
"node": ">=8.6"
}
},
+ "node_modules/es-define-property": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+ "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
+ "node_modules/es-errors": {
+ "version": "1.3.0",
+ "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+ "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
"node_modules/es-get-iterator": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/es-get-iterator/-/es-get-iterator-1.1.3.tgz",
@@ -1206,6 +1262,17 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/es-object-atoms": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
+ "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+ "dependencies": {
+ "es-errors": "^1.3.0"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
"node_modules/escape-html": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
@@ -1266,36 +1333,36 @@
}
},
"node_modules/express": {
- "version": "4.19.2",
- "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz",
- "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==",
+ "version": "4.21.2",
+ "resolved": "https://registry.npmjs.org/express/-/express-4.21.2.tgz",
+ "integrity": "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==",
"dependencies": {
"accepts": "~1.3.8",
"array-flatten": "1.1.1",
- "body-parser": "1.20.2",
+ "body-parser": "1.20.3",
"content-disposition": "0.5.4",
"content-type": "~1.0.4",
- "cookie": "0.6.0",
+ "cookie": "0.7.1",
"cookie-signature": "1.0.6",
"debug": "2.6.9",
"depd": "2.0.0",
- "encodeurl": "~1.0.2",
+ "encodeurl": "~2.0.0",
"escape-html": "~1.0.3",
"etag": "~1.8.1",
- "finalhandler": "1.2.0",
+ "finalhandler": "1.3.1",
"fresh": "0.5.2",
"http-errors": "2.0.0",
- "merge-descriptors": "1.0.1",
+ "merge-descriptors": "1.0.3",
"methods": "~1.1.2",
"on-finished": "2.4.1",
"parseurl": "~1.3.3",
- "path-to-regexp": "0.1.7",
+ "path-to-regexp": "0.1.12",
"proxy-addr": "~2.0.7",
- "qs": "6.11.0",
+ "qs": "6.13.0",
"range-parser": "~1.2.1",
"safe-buffer": "5.2.1",
- "send": "0.18.0",
- "serve-static": "1.15.0",
+ "send": "0.19.0",
+ "serve-static": "1.16.2",
"setprototypeof": "1.2.0",
"statuses": "2.0.1",
"type-is": "~1.6.18",
@@ -1304,6 +1371,10 @@
},
"engines": {
"node": ">= 0.10.0"
+ },
+ "funding": {
+ "type": "opencollective",
+ "url": "https://opencollective.com/express"
}
},
"node_modules/express/node_modules/debug": {
@@ -1320,11 +1391,11 @@
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
},
"node_modules/express/node_modules/qs": {
- "version": "6.11.0",
- "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",
- "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==",
+ "version": "6.13.0",
+ "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
+ "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==",
"dependencies": {
- "side-channel": "^1.0.4"
+ "side-channel": "^1.0.6"
},
"engines": {
"node": ">=0.6"
@@ -1393,12 +1464,12 @@
}
},
"node_modules/finalhandler": {
- "version": "1.2.0",
- "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz",
- "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==",
+ "version": "1.3.1",
+ "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
+ "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==",
"dependencies": {
"debug": "2.6.9",
- "encodeurl": "~1.0.2",
+ "encodeurl": "~2.0.0",
"escape-html": "~1.0.3",
"on-finished": "2.4.1",
"parseurl": "~1.3.3",
@@ -1498,19 +1569,40 @@
}
},
"node_modules/get-intrinsic": {
- "version": "1.2.2",
- "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.2.tgz",
- "integrity": "sha512-0gSo4ml/0j98Y3lngkFEot/zhiCeWsbYIlZ+uZOVgzLyLaUw7wxUL+nCTP0XJvJg1AXulJRI3UJi8GsbDuxdGA==",
+ "version": "1.3.0",
+ "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
+ "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
"dependencies": {
+ "call-bind-apply-helpers": "^1.0.2",
+ "es-define-property": "^1.0.1",
+ "es-errors": "^1.3.0",
+ "es-object-atoms": "^1.1.1",
"function-bind": "^1.1.2",
- "has-proto": "^1.0.1",
- "has-symbols": "^1.0.3",
- "hasown": "^2.0.0"
+ "get-proto": "^1.0.1",
+ "gopd": "^1.2.0",
+ "has-symbols": "^1.1.0",
+ "hasown": "^2.0.2",
+ "math-intrinsics": "^1.1.0"
+ },
+ "engines": {
+ "node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/get-proto": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
+ "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+ "dependencies": {
+ "dunder-proto": "^1.0.1",
+ "es-object-atoms": "^1.0.0"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
"node_modules/get-stream": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz",
@@ -1556,11 +1648,11 @@
}
},
"node_modules/gopd": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
- "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==",
- "dependencies": {
- "get-intrinsic": "^1.1.3"
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+ "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+ "engines": {
+ "node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
@@ -1598,21 +1690,10 @@
"url": "https://github.com/sponsors/ljharb"
}
},
- "node_modules/has-proto": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.1.tgz",
- "integrity": "sha512-7qE+iP+O+bgF9clE5+UoBFzE65mlBiVj3tKCrlNQ0Ogwm0BjpT/gK4SlLYDMybDh5I3TCTKnPPa0oMG7JDYrhg==",
- "engines": {
- "node": ">= 0.4"
- },
- "funding": {
- "url": "https://github.com/sponsors/ljharb"
- }
- },
"node_modules/has-symbols": {
- "version": "1.0.3",
- "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
- "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
+ "version": "1.1.0",
+ "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
+ "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
"engines": {
"node": ">= 0.4"
},
@@ -1635,9 +1716,9 @@
}
},
"node_modules/hasown": {
- "version": "2.0.0",
- "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.0.tgz",
- "integrity": "sha512-vUptKVTpIJhcczKBbgnS+RtcuYMB8+oNzPK2/Hp3hanz8JmpATdmmgLgSaadVREkDm+e2giHwY3ZRkyjSIDDFA==",
+ "version": "2.0.2",
+ "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+ "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
"dependencies": {
"function-bind": "^1.1.2"
},
@@ -2284,6 +2365,14 @@
"lz-string": "bin/bin.js"
}
},
+ "node_modules/math-intrinsics": {
+ "version": "1.1.0",
+ "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
+ "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
"node_modules/media-typer": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
@@ -2293,9 +2382,12 @@
}
},
"node_modules/merge-descriptors": {
- "version": "1.0.1",
- "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
- "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w=="
+ "version": "1.0.3",
+ "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz",
+ "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==",
+ "funding": {
+ "url": "https://github.com/sponsors/sindresorhus"
+ }
},
"node_modules/merge-stream": {
"version": "2.0.0",
@@ -2405,9 +2497,12 @@
}
},
"node_modules/object-inspect": {
- "version": "1.13.1",
- "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz",
- "integrity": "sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==",
+ "version": "1.13.4",
+ "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz",
+ "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==",
+ "engines": {
+ "node": ">= 0.4"
+ },
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
@@ -2526,9 +2621,9 @@
}
},
"node_modules/path-to-regexp": {
- "version": "0.1.7",
- "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
- "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ=="
+ "version": "0.1.12",
+ "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz",
+ "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ=="
},
"node_modules/pdf-parse": {
"version": "1.1.1",
@@ -2807,9 +2902,9 @@
}
},
"node_modules/send": {
- "version": "0.18.0",
- "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz",
- "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==",
+ "version": "0.19.0",
+ "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz",
+ "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==",
"dependencies": {
"debug": "2.6.9",
"depd": "2.0.0",
@@ -2842,20 +2937,28 @@
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
},
+ "node_modules/send/node_modules/encodeurl": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
+ "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
+ "engines": {
+ "node": ">= 0.8"
+ }
+ },
"node_modules/send/node_modules/ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
},
"node_modules/serve-static": {
- "version": "1.15.0",
- "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz",
- "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==",
+ "version": "1.16.2",
+ "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz",
+ "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==",
"dependencies": {
- "encodeurl": "~1.0.2",
+ "encodeurl": "~2.0.0",
"escape-html": "~1.0.3",
"parseurl": "~1.3.3",
- "send": "0.18.0"
+ "send": "0.19.0"
},
"engines": {
"node": ">= 0.8.0"
@@ -2925,13 +3028,68 @@
}
},
"node_modules/side-channel": {
- "version": "1.0.4",
- "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz",
- "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==",
+ "version": "1.1.0",
+ "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz",
+ "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==",
"dependencies": {
- "call-bind": "^1.0.0",
- "get-intrinsic": "^1.0.2",
- "object-inspect": "^1.9.0"
+ "es-errors": "^1.3.0",
+ "object-inspect": "^1.13.3",
+ "side-channel-list": "^1.0.0",
+ "side-channel-map": "^1.0.1",
+ "side-channel-weakmap": "^1.0.2"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
+ "node_modules/side-channel-list": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz",
+ "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==",
+ "dependencies": {
+ "es-errors": "^1.3.0",
+ "object-inspect": "^1.13.3"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
+ "node_modules/side-channel-map": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz",
+ "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==",
+ "dependencies": {
+ "call-bound": "^1.0.2",
+ "es-errors": "^1.3.0",
+ "get-intrinsic": "^1.2.5",
+ "object-inspect": "^1.13.3"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
+ "node_modules/side-channel-weakmap": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz",
+ "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==",
+ "dependencies": {
+ "call-bound": "^1.0.2",
+ "es-errors": "^1.3.0",
+ "get-intrinsic": "^1.2.5",
+ "object-inspect": "^1.13.3",
+ "side-channel-map": "^1.0.1"
+ },
+ "engines": {
+ "node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
diff --git a/server-ce/test/package.json b/server-ce/test/package.json
index 674154dd39..36ba3df2dd 100644
--- a/server-ce/test/package.json
+++ b/server-ce/test/package.json
@@ -15,10 +15,10 @@
"@types/pdf-parse": "^1.1.4",
"@types/uuid": "^9.0.8",
"adm-zip": "^0.5.12",
- "body-parser": "^1.20.2",
+ "body-parser": "^1.20.3",
"celebrate": "^15.0.3",
"cypress": "13.13.2",
- "express": "^4.19.2",
+ "express": "^4.21.2",
"isomorphic-git": "^1.25.10",
"js-yaml": "^4.1.0",
"pdf-parse": "^1.1.1",
diff --git a/server-ce/test/project-list.spec.ts b/server-ce/test/project-list.spec.ts
index 9ee9ac9ca0..998fcf9ffb 100644
--- a/server-ce/test/project-list.spec.ts
+++ b/server-ce/test/project-list.spec.ts
@@ -18,11 +18,11 @@ describe('Project List', () => {
describe('user with no projects', () => {
ensureUserExists({ email: WITHOUT_PROJECTS_USER })
- it("'Import from Github' is not displayed in the welcome page", () => {
+ it("'Import from GitHub' is not displayed in the welcome page", () => {
login(WITHOUT_PROJECTS_USER)
cy.visit('/project')
cy.findByText('Create a new project').click()
- cy.findByText(/Import from Github/i).should('not.exist')
+ cy.findByText(/Import from GitHub/i).should('not.exist')
})
})
@@ -31,29 +31,27 @@ describe('Project List', () => {
ensureUserExists({ email: REGULAR_USER })
before(() => {
+ login(REGULAR_USER)
+ createProject(projectName, { type: 'Example project', open: false })
+ })
+ beforeEach(function () {
login(REGULAR_USER)
cy.visit('/project')
- createProject(projectName, { type: 'Example Project' })
})
it('Can download project sources', () => {
- login(REGULAR_USER)
- cy.visit('/project')
-
findProjectRow(projectName).within(() =>
cy.findByRole('button', { name: 'Download .zip file' }).click()
)
+ const zipName = projectName.replaceAll('-', '_')
cy.task('readFileInZip', {
- pathToZip: `cypress/downloads/${projectName}.zip`,
+ pathToZip: `cypress/downloads/${zipName}.zip`,
fileToRead: 'main.tex',
}).should('contain', 'Your introduction goes here')
})
it('Can download project PDF', () => {
- login(REGULAR_USER)
- cy.visit('/project')
-
findProjectRow(projectName).within(() =>
cy.findByRole('button', { name: 'Download PDF' }).click()
)
@@ -67,9 +65,6 @@ describe('Project List', () => {
it('can assign and remove tags to projects', () => {
const tagName = uuid().slice(0, 7) // long tag names are truncated in the UI, which affects selectors
- login(REGULAR_USER)
- cy.visit('/project')
-
cy.log('select project')
cy.get(`[aria-label="Select ${projectName}"]`).click()
@@ -90,9 +85,7 @@ describe('Project List', () => {
cy.log('create a separate project to filter')
const nonTaggedProjectName = `project-${uuid()}`
login(REGULAR_USER)
- cy.visit('/project')
- createProject(nonTaggedProjectName)
- cy.visit('/project')
+ createProject(nonTaggedProjectName, { open: false })
cy.log('select project')
cy.get(`[aria-label="Select ${projectName}"]`).click()
diff --git a/server-ce/test/project-sharing.spec.ts b/server-ce/test/project-sharing.spec.ts
index e14f36d778..4da2209332 100644
--- a/server-ce/test/project-sharing.spec.ts
+++ b/server-ce/test/project-sharing.spec.ts
@@ -4,6 +4,9 @@ import { ensureUserExists, login } from './helpers/login'
import {
createProject,
enableLinkSharing,
+ openProjectByName,
+ openProjectViaLinkSharingAsAnon,
+ openProjectViaLinkSharingAsUser,
shareProjectByEmailAndAcceptInviteViaDash,
shareProjectByEmailAndAcceptInviteViaEmail,
} from './helpers/project'
@@ -31,7 +34,6 @@ describe('Project Sharing', function () {
function setupTestProject() {
login('user@example.com')
- cy.visit('/project')
createProject(projectName)
// Add chat message
@@ -53,8 +55,15 @@ describe('Project Sharing', function () {
function expectContentReadOnlyAccess() {
cy.url().should('match', /\/project\/[a-fA-F0-9]{24}/)
- cy.get('.cm-content').should('contain.text', '\\maketitle')
- cy.get('.cm-content').should('have.attr', 'contenteditable', 'false')
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).should(
+ 'contain.text',
+ '\\maketitle'
+ )
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).should(
+ 'have.attr',
+ 'contenteditable',
+ 'false'
+ )
}
function expectContentWriteAccess() {
@@ -62,13 +71,23 @@ describe('Project Sharing', function () {
cy.url().should('match', /\/project\/[a-fA-F0-9]{24}/)
const recompile = throttledRecompile()
// wait for the editor to finish loading
- cy.get('.cm-content').should('contain.text', '\\maketitle')
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).should(
+ 'contain.text',
+ '\\maketitle'
+ )
// the editor should be writable
- cy.get('.cm-content').should('have.attr', 'contenteditable', 'true')
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).should(
+ 'have.attr',
+ 'contenteditable',
+ 'true'
+ )
cy.findByText('\\maketitle').parent().click()
cy.findByText('\\maketitle').parent().type(`\n\\section{{}${section}}`)
// should have written
- cy.get('.cm-content').should('contain.text', `\\section{${section}}`)
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).should(
+ 'contain.text',
+ `\\section{${section}}`
+ )
// check PDF
recompile()
cy.get('.pdf-viewer').should('contain.text', projectName)
@@ -152,16 +171,10 @@ describe('Project Sharing', function () {
beforeEach(function () {
login('user@example.com')
- shareProjectByEmailAndAcceptInviteViaEmail(
- projectName,
- email,
- 'Read only'
- )
+ shareProjectByEmailAndAcceptInviteViaEmail(projectName, email, 'Viewer')
})
it('should grant the collaborator read access', () => {
- cy.visit('/project')
- cy.findByText(projectName).click()
expectFullReadOnlyAccess()
expectProjectDashboardEntry()
})
@@ -173,13 +186,12 @@ describe('Project Sharing', function () {
beforeWithReRunOnTestRetry(function () {
login('user@example.com')
- shareProjectByEmailAndAcceptInviteViaDash(projectName, email, 'Read only')
+ shareProjectByEmailAndAcceptInviteViaDash(projectName, email, 'Viewer')
})
it('should grant the collaborator read access', () => {
login(email)
- cy.visit('/project')
- cy.findByText(projectName).click()
+ openProjectByName(projectName)
expectFullReadOnlyAccess()
expectProjectDashboardEntry()
})
@@ -191,13 +203,12 @@ describe('Project Sharing', function () {
beforeWithReRunOnTestRetry(function () {
login('user@example.com')
- shareProjectByEmailAndAcceptInviteViaDash(projectName, email, 'Can edit')
+ shareProjectByEmailAndAcceptInviteViaDash(projectName, email, 'Editor')
})
it('should grant the collaborator write access', () => {
login(email)
- cy.visit('/project')
- cy.findByText(projectName).click()
+ openProjectByName(projectName)
expectReadAndWriteAccess()
expectEditAuthoredAs('You')
expectProjectDashboardEntry()
@@ -212,9 +223,11 @@ describe('Project Sharing', function () {
it('should grant restricted read access', () => {
login(email)
- cy.visit(linkSharingReadOnly)
- cy.findByText(projectName) // wait for lazy loading
- cy.findByText('Join Project').click()
+ openProjectViaLinkSharingAsUser(
+ linkSharingReadOnly,
+ projectName,
+ email
+ )
expectRestrictedReadOnlyAccess()
expectProjectDashboardEntry()
})
@@ -226,9 +239,11 @@ describe('Project Sharing', function () {
it('should grant full write access', () => {
login(email)
- cy.visit(linkSharingReadAndWrite)
- cy.findByText(projectName) // wait for lazy loading
- cy.findByText('Join Project').click()
+ openProjectViaLinkSharingAsUser(
+ linkSharingReadAndWrite,
+ projectName,
+ email
+ )
expectReadAndWriteAccess()
expectEditAuthoredAs('You')
expectProjectDashboardEntry()
@@ -272,7 +287,7 @@ describe('Project Sharing', function () {
withDataDir: true,
})
it('should grant read access with read link', () => {
- cy.visit(linkSharingReadOnly)
+ openProjectViaLinkSharingAsAnon(linkSharingReadOnly)
expectRestrictedReadOnlyAccess()
})
@@ -292,12 +307,12 @@ describe('Project Sharing', function () {
})
it('should grant read access with read link', () => {
- cy.visit(linkSharingReadOnly)
+ openProjectViaLinkSharingAsAnon(linkSharingReadOnly)
expectRestrictedReadOnlyAccess()
})
it('should grant write access with write link', () => {
- cy.visit(linkSharingReadAndWrite)
+ openProjectViaLinkSharingAsAnon(linkSharingReadAndWrite)
expectReadAndWriteAccess()
expectEditAuthoredAs('Anonymous')
})
diff --git a/server-ce/test/sandboxed-compiles.spec.ts b/server-ce/test/sandboxed-compiles.spec.ts
index e50aa36283..71f5b43392 100644
--- a/server-ce/test/sandboxed-compiles.spec.ts
+++ b/server-ce/test/sandboxed-compiles.spec.ts
@@ -1,7 +1,7 @@
import { ensureUserExists, login } from './helpers/login'
import { createProject } from './helpers/project'
import { isExcludedBySharding, startWith } from './helpers/config'
-import { throttledRecompile } from './helpers/compile'
+import { throttledRecompile, stopCompile } from './helpers/compile'
import { v4 as uuid } from 'uuid'
import { waitUntilScrollingFinished } from './helpers/waitUntilScrollingFinished'
import { beforeWithReRunOnTestRetry } from './helpers/beforeWithReRunOnTestRetry'
@@ -10,9 +10,7 @@ const LABEL_TEX_LIVE_VERSION = 'TeX Live version'
describe('SandboxedCompiles', function () {
const enabledVars = {
- DOCKER_RUNNER: 'true',
SANDBOXED_COMPILES: 'true',
- SANDBOXED_COMPILES_SIBLING_CONTAINERS: 'true',
ALL_TEX_LIVE_DOCKER_IMAGE_NAMES: '2023,2022',
}
@@ -29,7 +27,6 @@ describe('SandboxedCompiles', function () {
})
it('should offer TexLive images and switch the compiler', function () {
- cy.visit('/project')
createProject('sandboxed')
const recompile = throttledRecompile()
cy.log('wait for compile')
@@ -46,7 +43,7 @@ describe('SandboxedCompiles', function () {
.findByText('2023')
.parent()
.select('2022')
- cy.get('#left-menu-modal').click()
+ cy.get('.left-menu-modal-backdrop').click()
cy.log('Trigger compile with other TeX Live version')
recompile()
@@ -59,14 +56,47 @@ describe('SandboxedCompiles', function () {
checkSyncTeX()
checkXeTeX()
checkRecompilesAfterErrors()
+ checkStopCompile()
})
+ function checkStopCompile() {
+ it('users can stop a running compile', function () {
+ login('user@example.com')
+ createProject('test-project')
+ // create an infinite loop in the main document
+ // this will cause the compile to run indefinitely
+ cy.findByText('\\maketitle').parent().click()
+ cy.findByText('\\maketitle')
+ .parent()
+ .type('\n\\def\\x{{}Hello!\\par\\x}\\x')
+ cy.log('Start compile')
+ // We need to start the compile manually because we do not want to wait for it to finish
+ cy.findByText('Recompile').click()
+ // Now stop the compile and kill the latex process
+ stopCompile({ delay: 1000 })
+ cy.get('.logs-pane')
+ .invoke('text')
+ .should('match', /PDF Rendering Error|Compilation cancelled/)
+ // Check that the previous compile is not running in the background by
+ // disabling the infinite loop and recompiling
+ cy.findByText('\\def').parent().click()
+ cy.findByText('\\def').parent().type('{home}disabled loop% ')
+ cy.findByText('Recompile').click()
+ cy.get('.pdf-viewer').should('contain.text', 'disabled loop')
+ cy.get('.logs-pane').should(
+ 'not.contain.text',
+ 'A previous compile is still running'
+ )
+ })
+ }
+
function checkSyncTeX() {
- describe('SyncTeX', function () {
+ // TODO(25342): re-enable
+ // eslint-disable-next-line mocha/no-skipped-tests
+ describe.skip('SyncTeX', function () {
let projectName: string
beforeEach(function () {
projectName = `Project ${uuid()}`
- cy.visit('/project')
createProject(projectName)
const recompile = throttledRecompile()
cy.findByText('\\maketitle').parent().click()
@@ -131,7 +161,9 @@ describe('SandboxedCompiles', function () {
})
cy.log('navigate to Section A')
- cy.get('.cm-content').within(() => cy.findByText('Section A').click())
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).within(
+ () => cy.findByText('Section A').click()
+ )
cy.get('[aria-label="Go to code location in PDF"]').click()
cy.get('@title').then((title: any) => {
waitUntilScrollingFinished('.pdfjs-viewer-inner', title)
@@ -140,7 +172,9 @@ describe('SandboxedCompiles', function () {
})
cy.log('navigate to Section B')
- cy.get('.cm-content').within(() => cy.findByText('Section B').click())
+ cy.findByRole('textbox', { name: /Source Editor editing/i }).within(
+ () => cy.findByText('Section B').click()
+ )
cy.get('[aria-label="Go to code location in PDF"]').click()
cy.get('@sectionA').then((title: any) => {
waitUntilScrollingFinished('.pdfjs-viewer-inner', title)
@@ -154,7 +188,6 @@ describe('SandboxedCompiles', function () {
function checkRecompilesAfterErrors() {
it('recompiles even if there are Latex errors', function () {
login('user@example.com')
- cy.visit('/project')
createProject('test-project')
const recompile = throttledRecompile()
cy.findByText('\\maketitle').parent().click()
@@ -170,7 +203,6 @@ describe('SandboxedCompiles', function () {
function checkXeTeX() {
it('should be able to use XeLaTeX', function () {
- cy.visit('/project')
createProject('XeLaTeX')
const recompile = throttledRecompile()
cy.log('wait for compile')
@@ -187,7 +219,7 @@ describe('SandboxedCompiles', function () {
.findByText('pdfLaTeX')
.parent()
.select('XeLaTeX')
- cy.get('#left-menu-modal').click()
+ cy.get('.left-menu-modal-backdrop').click()
cy.log('Trigger compile with other compiler')
recompile()
@@ -204,14 +236,13 @@ describe('SandboxedCompiles', function () {
})
it('should not offer TexLive images and use default compiler', function () {
- cy.visit('/project')
createProject('sandboxed')
cy.log('wait for compile')
cy.get('.pdf-viewer').should('contain.text', 'sandboxed')
- cy.log('Check which compiler version was used, expect 2024')
+ cy.log('Check which compiler version was used, expect 2025')
cy.get('[aria-label="View logs"]').click()
- cy.findByText(/This is pdfTeX, Version .+ \(TeX Live 2024\) /)
+ cy.findByText(/This is pdfTeX, Version .+ \(TeX Live 2025\) /)
cy.log('Check that there is no TeX Live version toggle')
cy.get('header').findByText('Menu').click()
@@ -232,6 +263,7 @@ describe('SandboxedCompiles', function () {
checkSyncTeX()
checkXeTeX()
checkRecompilesAfterErrors()
+ checkStopCompile()
})
describe.skip('unavailable in CE', function () {
@@ -246,5 +278,6 @@ describe('SandboxedCompiles', function () {
checkSyncTeX()
checkXeTeX()
checkRecompilesAfterErrors()
+ checkStopCompile()
})
})
diff --git a/server-ce/test/templates.spec.ts b/server-ce/test/templates.spec.ts
index b4ea393728..4959e149fc 100644
--- a/server-ce/test/templates.spec.ts
+++ b/server-ce/test/templates.spec.ts
@@ -47,7 +47,9 @@ describe('Templates', () => {
cy.url().should('match', /\/templates$/)
})
- it('should have templates feature', () => {
+ // TODO(25342): re-enable
+ // eslint-disable-next-line mocha/no-skipped-tests
+ it.skip('should have templates feature', () => {
login(TEMPLATES_USER)
const name = `Template ${Date.now()}`
const description = `Template Description ${Date.now()}`
@@ -64,7 +66,7 @@ describe('Templates', () => {
.get('textarea')
.type(description)
cy.findByText('Publish').click()
- cy.findByText('Publishing…').should('be.disabled')
+ cy.findByText('Publishing…').parent().should('be.disabled')
cy.findByText('Publish').should('not.exist')
cy.findByText('Unpublish', { timeout: 10_000 })
cy.findByText('Republish')
@@ -96,12 +98,12 @@ describe('Templates', () => {
.parent()
.parent()
.within(() => cy.get('input[type="checkbox"]').first().check())
- cy.get('.project-list-sidebar-react').within(() => {
+ cy.get('.project-list-sidebar-scroll').within(() => {
cy.findAllByText('New Tag').first().click()
})
cy.focused().type(tagName)
cy.findByText('Create').click()
- cy.get('.project-list-sidebar-react').within(() => {
+ cy.get('.project-list-sidebar-scroll').within(() => {
cy.findByText(tagName)
.parent()
.within(() => cy.get('.name').should('have.text', `${tagName} (1)`))
diff --git a/server-ce/test/upgrading.spec.ts b/server-ce/test/upgrading.spec.ts
index 86a3ea0cad..16e0320dcc 100644
--- a/server-ce/test/upgrading.spec.ts
+++ b/server-ce/test/upgrading.spec.ts
@@ -1,7 +1,7 @@
import { ensureUserExists, login } from './helpers/login'
import { isExcludedBySharding, startWith } from './helpers/config'
import { dockerCompose, runScript } from './helpers/hostAdminClient'
-import { createProject } from './helpers/project'
+import { createProject, openProjectByName } from './helpers/project'
import { throttledRecompile } from './helpers/compile'
import { v4 as uuid } from 'uuid'
@@ -38,8 +38,6 @@ describe('Upgrading', function () {
before(() => {
cy.log('Populate old instance')
login(USER)
-
- cy.visit('/project')
createProject(PROJECT_NAME, {
newProjectButtonMatcher: startOptions.newProjectButtonMatcher,
})
@@ -59,7 +57,7 @@ describe('Upgrading', function () {
recompile()
cy.get('header').findByText('Menu').click()
cy.findByText('Source').click()
- cy.get('#left-menu-modal').click()
+ cy.get('.left-menu-modal-backdrop').click({ force: true })
}
cy.log('Check compile and history')
@@ -115,8 +113,7 @@ describe('Upgrading', function () {
})
it('should open the old project', () => {
- cy.visit('/project')
- cy.findByText(PROJECT_NAME).click()
+ openProjectByName(PROJECT_NAME)
cy.url().should('match', /\/project\/[a-fA-F0-9]{24}/)
cy.findByRole('navigation').within(() => {
diff --git a/services/chat/.gitignore b/services/chat/.gitignore
deleted file mode 100644
index f0cf94b147..0000000000
--- a/services/chat/.gitignore
+++ /dev/null
@@ -1,12 +0,0 @@
-**.swp
-
-public/build/
-
-node_modules/
-
-plato/
-
-**/*.map
-
-# managed by dev-environment$ bin/update_build_scripts
-.npmrc
diff --git a/services/chat/.nvmrc b/services/chat/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/chat/.nvmrc
+++ b/services/chat/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/chat/Dockerfile b/services/chat/Dockerfile
index 2d906e4f4a..66a8bc3ded 100644
--- a/services/chat/Dockerfile
+++ b/services/chat/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/chat
diff --git a/services/chat/Makefile b/services/chat/Makefile
index 0bcab97c32..792f5d2cd6 100644
--- a/services/chat/Makefile
+++ b/services/chat/Makefile
@@ -32,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -63,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -98,13 +116,6 @@ test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) up -d mongo
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) exec -T mongo sh -c ' \
- while ! mongosh --eval "db.version()" > /dev/null; do \
- echo "Waiting for Mongo..."; \
- sleep 1; \
- done; \
- mongosh --eval "rs.initiate({ _id: \"overleaf\", members: [ { _id: 0, host: \"mongo:27017\" } ] })"'
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif
@@ -137,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/chat/app/js/Features/Messages/MessageHttpController.js b/services/chat/app/js/Features/Messages/MessageHttpController.js
index a20d005864..45208e2c03 100644
--- a/services/chat/app/js/Features/Messages/MessageHttpController.js
+++ b/services/chat/app/js/Features/Messages/MessageHttpController.js
@@ -74,6 +74,10 @@ export async function deleteMessage(context) {
return await callMessageHttpController(context, _deleteMessage)
}
+export async function deleteUserMessage(context) {
+ return await callMessageHttpController(context, _deleteUserMessage)
+}
+
export async function getResolvedThreadIds(context) {
return await callMessageHttpController(context, _getResolvedThreadIds)
}
@@ -190,6 +194,13 @@ const _deleteMessage = async (req, res) => {
res.status(204)
}
+const _deleteUserMessage = async (req, res) => {
+ const { projectId, threadId, userId, messageId } = req.params
+ const room = await ThreadManager.findOrCreateThread(projectId, threadId)
+ await MessageManager.deleteUserMessage(userId, room._id, messageId)
+ res.status(204)
+}
+
const _getResolvedThreadIds = async (req, res) => {
const { projectId } = req.params
const resolvedThreadIds = await ThreadManager.getResolvedThreadIds(projectId)
diff --git a/services/chat/app/js/Features/Messages/MessageManager.js b/services/chat/app/js/Features/Messages/MessageManager.js
index cb8818e3b6..efff22a2a4 100644
--- a/services/chat/app/js/Features/Messages/MessageManager.js
+++ b/services/chat/app/js/Features/Messages/MessageManager.js
@@ -77,6 +77,14 @@ export async function deleteMessage(roomId, messageId) {
await db.messages.deleteOne(query)
}
+export async function deleteUserMessage(userId, roomId, messageId) {
+ await db.messages.deleteOne({
+ _id: new ObjectId(messageId),
+ user_id: new ObjectId(userId),
+ room_id: new ObjectId(roomId),
+ })
+}
+
function _ensureIdsAreObjectIds(query) {
if (query.user_id && !(query.user_id instanceof ObjectId)) {
query.user_id = new ObjectId(query.user_id)
diff --git a/services/chat/buildscript.txt b/services/chat/buildscript.txt
index cdd8eda974..35114bd2a4 100644
--- a/services/chat/buildscript.txt
+++ b/services/chat/buildscript.txt
@@ -4,6 +4,6 @@ chat
--env-add=
--env-pass-through=
--esmock-loader=False
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=False
---script-version=4.5.0
+--script-version=4.7.0
diff --git a/services/chat/chat.yaml b/services/chat/chat.yaml
index 3ccdf9bc30..35ed3d378d 100644
--- a/services/chat/chat.yaml
+++ b/services/chat/chat.yaml
@@ -177,6 +177,34 @@ paths:
'204':
description: No Content
description: 'Delete message with Message ID provided, from the Thread with ThreadID and ProjectID provided'
+ '/project/{projectId}/thread/{threadId}/user/{userId}/messages/{messageId}':
+ parameters:
+ - schema:
+ type: string
+ name: projectId
+ in: path
+ required: true
+ - schema:
+ type: string
+ name: threadId
+ in: path
+ required: true
+ - schema:
+ type: string
+ name: userId
+ in: path
+ required: true
+ - schema:
+ type: string
+ name: messageId
+ in: path
+ required: true
+ delete:
+ summary: Delete message written by a given user
+ operationId: deleteUserMessage
+ responses:
+ '204':
+ description: No Content
'/project/{projectId}/thread/{threadId}/resolve':
parameters:
- schema:
diff --git a/services/chat/docker-compose.ci.yml b/services/chat/docker-compose.ci.yml
index 6f1a608534..ca3303a079 100644
--- a/services/chat/docker-compose.ci.yml
+++ b/services/chat/docker-compose.ci.yml
@@ -24,10 +24,13 @@ services:
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
+ volumes:
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
user: node
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run test:acceptance
@@ -39,9 +42,14 @@ services:
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/chat/docker-compose.yml b/services/chat/docker-compose.yml
index 2bc8293c63..e7b8ce7385 100644
--- a/services/chat/docker-compose.yml
+++ b/services/chat/docker-compose.yml
@@ -6,7 +6,7 @@ version: "2.3"
services:
test_unit:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/chat
- ../../node_modules:/overleaf/node_modules
@@ -14,37 +14,45 @@ services:
working_dir: /overleaf/services/chat
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/chat
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
working_dir: /overleaf/services/chat
environment:
ELASTIC_SEARCH_DSN: es:9200
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run --silent test:acceptance
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/chat/package.json b/services/chat/package.json
index 1540137434..f3d37eb6d3 100644
--- a/services/chat/package.json
+++ b/services/chat/package.json
@@ -24,15 +24,15 @@
"async": "^3.2.5",
"body-parser": "^1.20.3",
"exegesis-express": "^4.0.0",
- "express": "^4.21.0",
- "mongodb": "6.10.0"
+ "express": "^4.21.2",
+ "mongodb": "6.12.0"
},
"devDependencies": {
"acorn": "^7.1.1",
"ajv": "^6.12.0",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
- "mocha": "^10.2.0",
+ "mocha": "^11.1.0",
"request": "^2.88.2",
"sandboxed-module": "^2.0.4",
"sinon": "^9.2.4",
diff --git a/services/clsi/.gitignore b/services/clsi/.gitignore
index 360466227e..a85e6b757a 100644
--- a/services/clsi/.gitignore
+++ b/services/clsi/.gitignore
@@ -1,14 +1,3 @@
-**.swp
-node_modules
-test/acceptance/fixtures/tmp
compiles
output
-.DS_Store
-*~
cache
-.vagrant
-config/*
-npm-debug.log
-
-# managed by dev-environment$ bin/update_build_scripts
-.npmrc
diff --git a/services/clsi/.nvmrc b/services/clsi/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/clsi/.nvmrc
+++ b/services/clsi/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/clsi/Dockerfile b/services/clsi/Dockerfile
index 48cd858817..77c26fab23 100644
--- a/services/clsi/Dockerfile
+++ b/services/clsi/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/clsi
COPY services/clsi/install_deps.sh /overleaf/services/clsi/
diff --git a/services/clsi/Makefile b/services/clsi/Makefile
index a744b12b2c..e02697f4e9 100644
--- a/services/clsi/Makefile
+++ b/services/clsi/Makefile
@@ -24,7 +24,6 @@ DOCKER_COMPOSE_TEST_UNIT = \
clean:
-docker rmi ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
- -docker rmi gcr.io/overleaf-ops/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
-docker rmi us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
-$(DOCKER_COMPOSE_TEST_UNIT) down --rmi local
-$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down --rmi local
@@ -33,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -64,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -111,11 +128,10 @@ build:
--pull \
--build-arg BUILDKIT_INLINE_CACHE=1 \
--tag ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
- --tag gcr.io/overleaf-ops/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
- --tag gcr.io/overleaf-ops/$(PROJECT_NAME):$(BRANCH_NAME) \
- --cache-from gcr.io/overleaf-ops/$(PROJECT_NAME):$(BRANCH_NAME) \
- --cache-from gcr.io/overleaf-ops/$(PROJECT_NAME):main \
--tag us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
+ --tag us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME) \
+ --cache-from us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME) \
+ --cache-from us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):main \
--file Dockerfile \
../..
@@ -132,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/clsi/README.md b/services/clsi/README.md
index 33a9c95c1c..f1cf927d3d 100644
--- a/services/clsi/README.md
+++ b/services/clsi/README.md
@@ -19,17 +19,18 @@ The CLSI can be configured through the following environment variables:
* `ALLOWED_IMAGES` - Space separated list of allowed Docker TeX Live images
* `CATCH_ERRORS` - Set to `true` to log uncaught exceptions
* `COMPILE_GROUP_DOCKER_CONFIGS` - JSON string of Docker configs for compile groups
-* `COMPILES_HOST_DIR` - Working directory for LaTeX compiles
+* `SANDBOXED_COMPILES` - Set to true to use sibling containers
+* `SANDBOXED_COMPILES_HOST_DIR_COMPILES` - Working directory for LaTeX compiles
+* `SANDBOXED_COMPILES_HOST_DIR_OUTPUT` - Output directory for LaTeX compiles
* `COMPILE_SIZE_LIMIT` - Sets the body-parser [limit](https://github.com/expressjs/body-parser#limit)
-* `DOCKER_RUNNER` - Set to true to use sibling containers
* `DOCKER_RUNTIME` -
* `FILESTORE_DOMAIN_OVERRIDE` - The url for the filestore service e.g.`http://$FILESTORE_HOST:3009`
* `FILESTORE_PARALLEL_FILE_DOWNLOADS` - Number of parallel file downloads
* `LISTEN_ADDRESS` - The address for the RESTful service to listen on. Set to `0.0.0.0` to listen on all network interfaces
* `PROCESS_LIFE_SPAN_LIMIT_MS` - Process life span limit in milliseconds
* `SMOKE_TEST` - Whether to run smoke tests
-* `TEXLIVE_IMAGE` - The TeX Live Docker image to use for sibling containers, e.g. `gcr.io/overleaf-ops/texlive-full:2017.1`
-* `TEX_LIVE_IMAGE_NAME_OVERRIDE` - The name of the registry for the Docker image e.g. `gcr.io/overleaf-ops`
+* `TEXLIVE_IMAGE` - The TeX Live Docker image to use for sibling containers, e.g. `us-east1-docker.pkg.dev/overleaf-ops/ol-docker/texlive-full:2017.1`
+* `TEX_LIVE_IMAGE_NAME_OVERRIDE` - The name of the registry for the Docker image e.g. `us-east1-docker.pkg.dev/overleaf-ops/ol-docker`
* `TEXLIVE_IMAGE_USER` - When using sibling containers, the user to run as in the TeX Live image. Defaults to `tex`
* `TEXLIVE_OPENOUT_ANY` - Sets the `openout_any` environment variable for TeX Live (see the `\openout` primitive [documentation](http://tug.org/texinfohtml/web2c.html#tex-invocation))
@@ -62,10 +63,10 @@ Then start the Docker container:
docker run --rm \
-p 127.0.0.1:3013:3013 \
-e LISTEN_ADDRESS=0.0.0.0 \
- -e DOCKER_RUNNER=true \
+ -e SANDBOXED_COMPILES=true \
-e TEXLIVE_IMAGE=texlive/texlive \
-e TEXLIVE_IMAGE_USER=root \
- -e COMPILES_HOST_DIR="$PWD/compiles" \
+ -e SANDBOXED_COMPILES_HOST_DIR_COMPILES="$PWD/compiles" \
-v "$PWD/compiles:/overleaf/services/clsi/compiles" \
-v "$PWD/cache:/overleaf/services/clsi/cache" \
-v /var/run/docker.sock:/var/run/docker.sock \
diff --git a/services/clsi/app.js b/services/clsi/app.js
index f132eb00f6..872f612d9c 100644
--- a/services/clsi/app.js
+++ b/services/clsi/app.js
@@ -128,26 +128,6 @@ const ForbidSymlinks = require('./app/js/StaticServerForbidSymlinks')
// create a static server which does not allow access to any symlinks
// avoids possible mismatch of root directory between middleware check
// and serving the files
-const staticCompileServer = ForbidSymlinks(
- express.static,
- Settings.path.compilesDir,
- {
- setHeaders(res, path, stat) {
- if (Path.basename(path) === 'output.pdf') {
- // Calculate an etag in the same way as nginx
- // https://github.com/tj/send/issues/65
- const etag = (path, stat) =>
- `"${Math.ceil(+stat.mtime / 1000).toString(16)}` +
- '-' +
- Number(stat.size).toString(16) +
- '"'
- res.set('Etag', etag(path, stat))
- }
- res.set('Content-Type', ContentTypeMapper.map(path))
- },
- }
-)
-
const staticOutputServer = ForbidSymlinks(
express.static,
Settings.path.outputDir,
@@ -213,32 +193,6 @@ app.get(
}
)
-app.get(
- '/project/:project_id/user/:user_id/output/*',
- function (req, res, next) {
- // for specific user get the path to the top level file
- logger.warn(
- { url: req.url },
- 'direct request for file in compile directory'
- )
- req.url = `/${req.params.project_id}-${req.params.user_id}/${req.params[0]}`
- staticCompileServer(req, res, next)
- }
-)
-
-app.get('/project/:project_id/output/*', function (req, res, next) {
- logger.warn({ url: req.url }, 'direct request for file in compile directory')
- if (req.query?.build?.match(OutputCacheManager.BUILD_REGEX)) {
- // for specific build get the path from the OutputCacheManager (e.g. .clsi/buildId)
- req.url =
- `/${req.params.project_id}/` +
- OutputCacheManager.path(req.query.build, `/${req.params[0]}`)
- } else {
- req.url = `/${req.params.project_id}/${req.params[0]}`
- }
- staticCompileServer(req, res, next)
-})
-
app.get('/oops', function (req, res, next) {
logger.error({ err: 'hello' }, 'test error')
res.send('error\n')
@@ -295,6 +249,9 @@ app.get('/health_check', function (req, res) {
if (Settings.processTooOld) {
return res.status(500).json({ processTooOld: true })
}
+ if (ProjectPersistenceManager.isAnyDiskCriticalLow()) {
+ return res.status(500).json({ diskCritical: true })
+ }
smokeTest.sendLastResult(res)
})
@@ -304,6 +261,8 @@ app.use(function (error, req, res, next) {
if (error instanceof Errors.NotFoundError) {
logger.debug({ err: error, url: req.url }, 'not found error')
res.sendStatus(404)
+ } else if (error instanceof Errors.InvalidParameter) {
+ res.status(400).send(error.message)
} else if (error.code === 'EPIPE') {
// inspect container returns EPIPE when shutting down
res.sendStatus(503) // send 503 Unavailable response
@@ -340,10 +299,18 @@ const loadTcpServer = net.createServer(function (socket) {
}
const freeLoad = availableWorkingCpus - currentLoad
- const freeLoadPercentage = Math.round(
- (freeLoad / availableWorkingCpus) * 100
- )
- if (freeLoadPercentage <= 0) {
+ let freeLoadPercentage = Math.round((freeLoad / availableWorkingCpus) * 100)
+ if (ProjectPersistenceManager.isAnyDiskCriticalLow()) {
+ freeLoadPercentage = 0
+ }
+ if (ProjectPersistenceManager.isAnyDiskLow()) {
+ freeLoadPercentage = freeLoadPercentage / 2
+ }
+
+ if (
+ Settings.internal.load_balancer_agent.allow_maintenance &&
+ freeLoadPercentage <= 0
+ ) {
// When its 0 the server is set to drain implicitly.
// Drain will move new projects to different servers.
// Drain will keep existing projects assigned to the same server.
@@ -351,7 +318,11 @@ const loadTcpServer = net.createServer(function (socket) {
socket.write(`maint, 0%\n`, 'ASCII')
} else {
// Ready will cancel the maint state.
- socket.write(`up, ready, ${freeLoadPercentage}%\n`, 'ASCII')
+ socket.write(`up, ready, ${Math.max(freeLoadPercentage, 1)}%\n`, 'ASCII')
+ if (freeLoadPercentage <= 0) {
+ // This metric records how often we would have gone into maintenance mode.
+ Metrics.inc('clsi-prevented-maint')
+ }
}
socket.end()
} else {
diff --git a/services/clsi/app/js/CLSICacheHandler.js b/services/clsi/app/js/CLSICacheHandler.js
new file mode 100644
index 0000000000..26acd221f9
--- /dev/null
+++ b/services/clsi/app/js/CLSICacheHandler.js
@@ -0,0 +1,304 @@
+const crypto = require('node:crypto')
+const fs = require('node:fs')
+const Path = require('node:path')
+const { pipeline } = require('node:stream/promises')
+const { createGzip, createGunzip } = require('node:zlib')
+const tarFs = require('tar-fs')
+const _ = require('lodash')
+const {
+ fetchNothing,
+ fetchStream,
+ RequestFailedError,
+} = require('@overleaf/fetch-utils')
+const logger = require('@overleaf/logger')
+const Metrics = require('@overleaf/metrics')
+const Settings = require('@overleaf/settings')
+const { MeteredStream } = require('@overleaf/stream-utils')
+const { CACHE_SUBDIR } = require('./OutputCacheManager')
+const { isExtraneousFile } = require('./ResourceWriter')
+
+const TIMING_BUCKETS = [
+ 0, 10, 100, 1000, 2000, 5000, 10000, 15000, 20000, 30000,
+]
+const MAX_ENTRIES_IN_OUTPUT_TAR = 100
+const OBJECT_ID_REGEX = /^[0-9a-f]{24}$/
+
+/**
+ * @param {string} projectId
+ * @return {{shard: string, url: string}}
+ */
+function getShard(projectId) {
+ // [timestamp 4bytes][random per machine 5bytes][counter 3bytes]
+ // [32bit 4bytes]
+ const last4Bytes = Buffer.from(projectId, 'hex').subarray(8, 12)
+ const idx = last4Bytes.readUInt32BE() % Settings.apis.clsiCache.shards.length
+ return Settings.apis.clsiCache.shards[idx]
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} userId
+ * @param {string} buildId
+ * @param {string} editorId
+ * @param {[{path: string}]} outputFiles
+ * @param {string} compileGroup
+ * @param {Record} options
+ * @return {string | undefined}
+ */
+function notifyCLSICacheAboutBuild({
+ projectId,
+ userId,
+ buildId,
+ editorId,
+ outputFiles,
+ compileGroup,
+ options,
+}) {
+ if (!Settings.apis.clsiCache.enabled) return undefined
+ if (!OBJECT_ID_REGEX.test(projectId)) return undefined
+ const { url, shard } = getShard(projectId)
+
+ /**
+ * @param {[{path: string}]} files
+ */
+ const enqueue = files => {
+ Metrics.count('clsi_cache_enqueue_files', files.length)
+ fetchNothing(`${url}/enqueue`, {
+ method: 'POST',
+ json: {
+ projectId,
+ userId,
+ buildId,
+ editorId,
+ files,
+ downloadHost: Settings.apis.clsi.downloadHost,
+ clsiServerId: Settings.apis.clsi.clsiServerId,
+ compileGroup,
+ options,
+ },
+ signal: AbortSignal.timeout(15_000),
+ }).catch(err => {
+ logger.warn(
+ { err, projectId, userId, buildId },
+ 'enqueue for clsi cache failed'
+ )
+ })
+ }
+
+ // PDF preview
+ enqueue(
+ outputFiles
+ .filter(
+ f =>
+ f.path === 'output.pdf' ||
+ f.path === 'output.log' ||
+ f.path === 'output.synctex.gz' ||
+ f.path.endsWith('.blg')
+ )
+ .map(f => {
+ if (f.path === 'output.pdf') {
+ return _.pick(f, 'path', 'size', 'contentId', 'ranges')
+ }
+ return _.pick(f, 'path')
+ })
+ )
+
+ // Compile Cache
+ buildTarball({ projectId, userId, buildId, outputFiles })
+ .then(() => {
+ enqueue([{ path: 'output.tar.gz' }])
+ })
+ .catch(err => {
+ logger.warn(
+ { err, projectId, userId, buildId },
+ 'build output.tar.gz for clsi cache failed'
+ )
+ })
+
+ return shard
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} userId
+ * @param {string} buildId
+ * @param {[{path: string}]} outputFiles
+ * @return {Promise}
+ */
+async function buildTarball({ projectId, userId, buildId, outputFiles }) {
+ const timer = new Metrics.Timer('clsi_cache_build', 1, {}, TIMING_BUCKETS)
+ const outputDir = Path.join(
+ Settings.path.outputDir,
+ userId ? `${projectId}-${userId}` : projectId,
+ CACHE_SUBDIR,
+ buildId
+ )
+
+ const files = outputFiles.filter(f => !isExtraneousFile(f.path))
+ if (files.length > MAX_ENTRIES_IN_OUTPUT_TAR) {
+ Metrics.inc('clsi_cache_build_too_many_entries')
+ throw new Error('too many output files for output.tar.gz')
+ }
+ Metrics.count('clsi_cache_build_files', files.length)
+
+ const path = Path.join(outputDir, 'output.tar.gz')
+ try {
+ await pipeline(
+ tarFs.pack(outputDir, { entries: files.map(f => f.path) }),
+ createGzip(),
+ fs.createWriteStream(path)
+ )
+ } catch (err) {
+ try {
+ await fs.promises.unlink(path)
+ } catch (e) {}
+ throw err
+ } finally {
+ timer.done()
+ }
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} userId
+ * @param {string} editorId
+ * @param {string} buildId
+ * @param {string} outputDir
+ * @return {Promise}
+ */
+async function downloadOutputDotSynctexFromCompileCache(
+ projectId,
+ userId,
+ editorId,
+ buildId,
+ outputDir
+) {
+ if (!Settings.apis.clsiCache.enabled) return false
+ if (!OBJECT_ID_REGEX.test(projectId)) return false
+
+ const timer = new Metrics.Timer(
+ 'clsi_cache_download',
+ 1,
+ { method: 'synctex' },
+ TIMING_BUCKETS
+ )
+ let stream
+ try {
+ stream = await fetchStream(
+ `${getShard(projectId).url}/project/${projectId}/${
+ userId ? `user/${userId}/` : ''
+ }build/${editorId}-${buildId}/search/output/output.synctex.gz`,
+ {
+ method: 'GET',
+ signal: AbortSignal.timeout(10_000),
+ }
+ )
+ } catch (err) {
+ if (err instanceof RequestFailedError && err.response.status === 404) {
+ timer.done({ status: 'not-found' })
+ return false
+ }
+ timer.done({ status: 'error' })
+ throw err
+ }
+ await fs.promises.mkdir(outputDir, { recursive: true })
+ const dst = Path.join(outputDir, 'output.synctex.gz')
+ const tmp = dst + crypto.randomUUID()
+ try {
+ await pipeline(
+ stream,
+ new MeteredStream(Metrics, 'clsi_cache_egress', {
+ path: 'output.synctex.gz',
+ }),
+ fs.createWriteStream(tmp)
+ )
+ await fs.promises.rename(tmp, dst)
+ } catch (err) {
+ try {
+ await fs.promises.unlink(tmp)
+ } catch {}
+ throw err
+ }
+ timer.done({ status: 'success' })
+ return true
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} userId
+ * @param {string} compileDir
+ * @return {Promise}
+ */
+async function downloadLatestCompileCache(projectId, userId, compileDir) {
+ if (!Settings.apis.clsiCache.enabled) return false
+ if (!OBJECT_ID_REGEX.test(projectId)) return false
+
+ const url = `${getShard(projectId).url}/project/${projectId}/${
+ userId ? `user/${userId}/` : ''
+ }latest/output/output.tar.gz`
+ const timer = new Metrics.Timer(
+ 'clsi_cache_download',
+ 1,
+ { method: 'tar' },
+ TIMING_BUCKETS
+ )
+ let stream
+ try {
+ stream = await fetchStream(url, {
+ method: 'GET',
+ signal: AbortSignal.timeout(10_000),
+ })
+ } catch (err) {
+ if (err instanceof RequestFailedError && err.response.status === 404) {
+ timer.done({ status: 'not-found' })
+ return false
+ }
+ timer.done({ status: 'error' })
+ throw err
+ }
+ let n = 0
+ let abort = false
+ await pipeline(
+ stream,
+ new MeteredStream(Metrics, 'clsi_cache_egress', { path: 'output.tar.gz' }),
+ createGunzip(),
+ tarFs.extract(compileDir, {
+ // use ignore hook for counting entries (files+folders) and validation.
+ // Include folders as they incur mkdir calls.
+ ignore(_, header) {
+ if (abort) return true // log once
+ n++
+ if (n > MAX_ENTRIES_IN_OUTPUT_TAR) {
+ abort = true
+ logger.warn(
+ {
+ url,
+ compileDir,
+ },
+ 'too many entries in tar-ball from clsi-cache'
+ )
+ } else if (header.type !== 'file' && header.type !== 'directory') {
+ abort = true
+ logger.warn(
+ {
+ url,
+ compileDir,
+ entryType: header.type,
+ },
+ 'unexpected entry in tar-ball from clsi-cache'
+ )
+ }
+ return abort
+ },
+ })
+ )
+ Metrics.count('clsi_cache_download_entries', n)
+ timer.done({ status: 'success' })
+ return !abort
+}
+
+module.exports = {
+ notifyCLSICacheAboutBuild,
+ downloadLatestCompileCache,
+ downloadOutputDotSynctexFromCompileCache,
+}
diff --git a/services/clsi/app/js/CompileController.js b/services/clsi/app/js/CompileController.js
index 3da884eed7..b3343ee233 100644
--- a/services/clsi/app/js/CompileController.js
+++ b/services/clsi/app/js/CompileController.js
@@ -1,3 +1,4 @@
+const Path = require('node:path')
const RequestParser = require('./RequestParser')
const CompileManager = require('./CompileManager')
const Settings = require('@overleaf/settings')
@@ -5,6 +6,7 @@ const Metrics = require('./Metrics')
const ProjectPersistenceManager = require('./ProjectPersistenceManager')
const logger = require('@overleaf/logger')
const Errors = require('./Errors')
+const { notifyCLSICacheAboutBuild } = require('./CLSICacheHandler')
let lastSuccessfulCompileTimestamp = 0
@@ -29,101 +31,135 @@ function compile(req, res, next) {
if (error) {
return next(error)
}
- CompileManager.doCompileWithLock(request, (error, result) => {
- let { buildId, outputFiles, stats, timings } = result || {}
- let code, status
- if (outputFiles == null) {
- outputFiles = []
- }
- if (error instanceof Errors.AlreadyCompilingError) {
- code = 423 // Http 423 Locked
- status = 'compile-in-progress'
- } else if (error instanceof Errors.FilesOutOfSyncError) {
- code = 409 // Http 409 Conflict
- status = 'retry'
- logger.warn(
- {
+ const stats = {}
+ const timings = {}
+ CompileManager.doCompileWithLock(
+ request,
+ stats,
+ timings,
+ (error, result) => {
+ let { buildId, outputFiles } = result || {}
+ let code, status
+ if (outputFiles == null) {
+ outputFiles = []
+ }
+ if (error instanceof Errors.AlreadyCompilingError) {
+ code = 423 // Http 423 Locked
+ status = 'compile-in-progress'
+ } else if (error instanceof Errors.FilesOutOfSyncError) {
+ code = 409 // Http 409 Conflict
+ status = 'retry'
+ logger.warn(
+ {
+ projectId: request.project_id,
+ userId: request.user_id,
+ },
+ 'files out of sync, please retry'
+ )
+ } else if (
+ error?.code === 'EPIPE' ||
+ error instanceof Errors.TooManyCompileRequestsError
+ ) {
+ // docker returns EPIPE when shutting down
+ code = 503 // send 503 Unavailable response
+ status = 'unavailable'
+ } else if (error?.terminated) {
+ status = 'terminated'
+ } else if (error?.validate) {
+ status = `validation-${error.validate}`
+ } else if (error?.timedout) {
+ status = 'timedout'
+ logger.debug(
+ { err: error, projectId: request.project_id },
+ 'timeout running compile'
+ )
+ } else if (error) {
+ status = 'error'
+ code = 500
+ logger.error(
+ { err: error, projectId: request.project_id },
+ 'error running compile'
+ )
+ } else {
+ if (
+ outputFiles.some(
+ file => file.path === 'output.pdf' && file.size > 0
+ )
+ ) {
+ status = 'success'
+ lastSuccessfulCompileTimestamp = Date.now()
+ } else if (request.stopOnFirstError) {
+ status = 'stopped-on-first-error'
+ } else {
+ status = 'failure'
+ logger.warn(
+ { projectId: request.project_id, outputFiles },
+ 'project failed to compile successfully, no output.pdf generated'
+ )
+ }
+
+ // log an error if any core files are found
+ if (outputFiles.some(file => file.path === 'core')) {
+ logger.error(
+ { projectId: request.project_id, req, outputFiles },
+ 'core file found in output'
+ )
+ }
+ }
+
+ if (error) {
+ outputFiles = error.outputFiles || []
+ buildId = error.buildId
+ }
+
+ let clsiCacheShard
+ if (
+ status === 'success' &&
+ request.editorId &&
+ request.populateClsiCache
+ ) {
+ clsiCacheShard = notifyCLSICacheAboutBuild({
projectId: request.project_id,
userId: request.user_id,
+ buildId: outputFiles[0].build,
+ editorId: request.editorId,
+ outputFiles,
+ compileGroup: request.compileGroup,
+ options: {
+ compiler: request.compiler,
+ draft: request.draft,
+ imageName: request.imageName
+ ? request.imageName
+ : undefined,
+ rootResourcePath: request.rootResourcePath,
+ stopOnFirstError: request.stopOnFirstError,
+ },
+ })
+ }
+
+ timer.done()
+ res.status(code || 200).send({
+ compile: {
+ status,
+ error: error?.message || error,
+ stats,
+ timings,
+ buildId,
+ clsiCacheShard,
+ outputUrlPrefix: Settings.apis.clsi.outputUrlPrefix,
+ outputFiles: outputFiles.map(file => ({
+ url:
+ `${Settings.apis.clsi.url}/project/${request.project_id}` +
+ (request.user_id != null
+ ? `/user/${request.user_id}`
+ : '') +
+ `/build/${file.build}/output/${file.path}`,
+ ...file,
+ })),
},
- 'files out of sync, please retry'
- )
- } else if (
- error?.code === 'EPIPE' ||
- error instanceof Errors.TooManyCompileRequestsError
- ) {
- // docker returns EPIPE when shutting down
- code = 503 // send 503 Unavailable response
- status = 'unavailable'
- } else if (error?.terminated) {
- status = 'terminated'
- } else if (error?.validate) {
- status = `validation-${error.validate}`
- } else if (error?.timedout) {
- status = 'timedout'
- logger.debug(
- { err: error, projectId: request.project_id },
- 'timeout running compile'
- )
- } else if (error) {
- status = 'error'
- code = 500
- logger.error(
- { err: error, projectId: request.project_id },
- 'error running compile'
- )
- } else {
- if (
- outputFiles.some(
- file => file.path === 'output.pdf' && file.size > 0
- )
- ) {
- status = 'success'
- lastSuccessfulCompileTimestamp = Date.now()
- } else if (request.stopOnFirstError) {
- status = 'stopped-on-first-error'
- } else {
- status = 'failure'
- logger.warn(
- { projectId: request.project_id, outputFiles },
- 'project failed to compile successfully, no output.pdf generated'
- )
- }
-
- // log an error if any core files are found
- if (outputFiles.some(file => file.path === 'core')) {
- logger.error(
- { projectId: request.project_id, req, outputFiles },
- 'core file found in output'
- )
- }
+ })
}
-
- if (error) {
- outputFiles = error.outputFiles || []
- buildId = error.buildId
- }
-
- timer.done()
- res.status(code || 200).send({
- compile: {
- status,
- error: error?.message || error,
- stats,
- timings,
- buildId,
- outputUrlPrefix: Settings.apis.clsi.outputUrlPrefix,
- outputFiles: outputFiles.map(file => ({
- url:
- `${Settings.apis.clsi.url}/project/${request.project_id}` +
- (request.user_id != null ? `/user/${request.user_id}` : '') +
- (file.build != null ? `/build/${file.build}` : '') +
- `/output/${file.path}`,
- ...file,
- })),
- },
- })
- })
+ )
}
)
})
@@ -154,30 +190,27 @@ function clearCache(req, res, next) {
}
function syncFromCode(req, res, next) {
- const { file } = req.query
+ const { file, editorId, buildId } = req.query
+ const compileFromClsiCache = req.query.compileFromClsiCache === 'true'
const line = parseInt(req.query.line, 10)
const column = parseInt(req.query.column, 10)
const { imageName } = req.query
const projectId = req.params.project_id
const userId = req.params.user_id
-
- if (imageName && !_isImageNameAllowed(imageName)) {
- return res.status(400).send('invalid image')
- }
-
CompileManager.syncFromCode(
projectId,
userId,
file,
line,
column,
- imageName,
- function (error, pdfPositions) {
+ { imageName, editorId, buildId, compileFromClsiCache },
+ function (error, pdfPositions, downloadedFromCache) {
if (error) {
return next(error)
}
res.json({
pdf: pdfPositions,
+ downloadedFromCache,
})
}
)
@@ -187,26 +220,24 @@ function syncFromPdf(req, res, next) {
const page = parseInt(req.query.page, 10)
const h = parseFloat(req.query.h)
const v = parseFloat(req.query.v)
- const { imageName } = req.query
+ const { imageName, editorId, buildId } = req.query
+ const compileFromClsiCache = req.query.compileFromClsiCache === 'true'
const projectId = req.params.project_id
const userId = req.params.user_id
-
- if (imageName && !_isImageNameAllowed(imageName)) {
- return res.status(400).send('invalid image')
- }
CompileManager.syncFromPdf(
projectId,
userId,
page,
h,
v,
- imageName,
- function (error, codePositions) {
+ { imageName, editorId, buildId, compileFromClsiCache },
+ function (error, codePositions, downloadedFromCache) {
if (error) {
return next(error)
}
res.json({
code: codePositions,
+ downloadedFromCache,
})
}
)
@@ -217,9 +248,6 @@ function wordcount(req, res, next) {
const projectId = req.params.project_id
const userId = req.params.user_id
const { image } = req.query
- if (image && !_isImageNameAllowed(image)) {
- return res.status(400).send('invalid image')
- }
logger.debug({ image, file, projectId }, 'word count request')
CompileManager.wordcount(
@@ -242,12 +270,6 @@ function status(req, res, next) {
res.send('OK')
}
-function _isImageNameAllowed(imageName) {
- const ALLOWED_IMAGES =
- Settings.clsi && Settings.clsi.docker && Settings.clsi.docker.allowedImages
- return !ALLOWED_IMAGES || ALLOWED_IMAGES.includes(imageName)
-}
-
module.exports = {
compile,
stopCompile,
diff --git a/services/clsi/app/js/CompileManager.js b/services/clsi/app/js/CompileManager.js
index 292be201f7..1b66927412 100644
--- a/services/clsi/app/js/CompileManager.js
+++ b/services/clsi/app/js/CompileManager.js
@@ -19,6 +19,11 @@ const Errors = require('./Errors')
const CommandRunner = require('./CommandRunner')
const { emitPdfStats } = require('./ContentCacheMetrics')
const SynctexOutputParser = require('./SynctexOutputParser')
+const {
+ downloadLatestCompileCache,
+ downloadOutputDotSynctexFromCompileCache,
+} = require('./CLSICacheHandler')
+const { callbackifyMultiResult } = require('@overleaf/promise-utils')
const COMPILE_TIME_BUCKETS = [
// NOTE: These buckets are locked in per metric name.
@@ -42,22 +47,22 @@ function getOutputDir(projectId, userId) {
return Path.join(Settings.path.outputDir, getCompileName(projectId, userId))
}
-async function doCompileWithLock(request) {
+async function doCompileWithLock(request, stats, timings) {
const compileDir = getCompileDir(request.project_id, request.user_id)
- await fsPromises.mkdir(compileDir, { recursive: true })
+ request.isInitialCompile =
+ (await fsPromises.mkdir(compileDir, { recursive: true })) === compileDir
// prevent simultaneous compiles
const lock = LockManager.acquire(compileDir)
try {
- return await doCompile(request)
+ return await doCompile(request, stats, timings)
} finally {
lock.release()
}
}
-async function doCompile(request) {
+async function doCompile(request, stats, timings) {
+ const { project_id: projectId, user_id: userId } = request
const compileDir = getCompileDir(request.project_id, request.user_id)
- const stats = {}
- const timings = {}
const timerE2E = new Metrics.Timer(
'compile-e2e-v2',
@@ -65,6 +70,25 @@ async function doCompile(request) {
request.metricsOpts,
COMPILE_TIME_BUCKETS
)
+ if (request.isInitialCompile) {
+ stats.isInitialCompile = 1
+ request.metricsOpts.compile = 'initial'
+ if (request.compileFromClsiCache) {
+ try {
+ if (await downloadLatestCompileCache(projectId, userId, compileDir)) {
+ stats.restoredClsiCache = 1
+ request.metricsOpts.compile = 'from-clsi-cache'
+ }
+ } catch (err) {
+ logger.warn(
+ { err, projectId, userId },
+ 'failed to populate compile dir from cache'
+ )
+ }
+ }
+ } else {
+ request.metricsOpts.compile = 'recompile'
+ }
const writeToDiskTimer = new Metrics.Timer(
'write-to-disk',
1,
@@ -296,7 +320,7 @@ async function doCompile(request) {
emitPdfStats(stats, timings, request)
}
- return { outputFiles, stats, timings, buildId }
+ return { outputFiles, buildId }
}
async function _saveOutputFiles({
@@ -313,24 +337,16 @@ async function _saveOutputFiles({
)
const outputDir = getOutputDir(request.project_id, request.user_id)
- let { outputFiles, allEntries } =
+ const { outputFiles: rawOutputFiles, allEntries } =
await OutputFileFinder.promises.findOutputFiles(resourceList, compileDir)
- let buildId
-
- try {
- const saveResult = await OutputCacheManager.promises.saveOutputFiles(
+ const { buildId, outputFiles } =
+ await OutputCacheManager.promises.saveOutputFiles(
{ request, stats, timings },
- outputFiles,
+ rawOutputFiles,
compileDir,
outputDir
)
- buildId = saveResult.buildId
- outputFiles = saveResult.outputFiles
- } catch (err) {
- const { project_id: projectId, user_id: userId } = request
- logger.err({ projectId, userId, err }, 'failed to save output files')
- }
timings.output = timer.done()
return { outputFiles, allEntries, buildId }
@@ -416,14 +432,7 @@ async function _checkDirectory(compileDir) {
return true
}
-async function syncFromCode(
- projectId,
- userId,
- filename,
- line,
- column,
- imageName
-) {
+async function syncFromCode(projectId, userId, filename, line, column, opts) {
// If LaTeX was run in a virtual environment, the file path that synctex expects
// might not match the file path on the host. The .synctex.gz file however, will be accessed
// wherever it is on the host.
@@ -439,15 +448,23 @@ async function syncFromCode(
'-o',
outputFilePath,
]
- const stdout = await _runSynctex(projectId, userId, command, imageName)
+ const { stdout, downloadedFromCache } = await _runSynctex(
+ projectId,
+ userId,
+ command,
+ opts
+ )
logger.debug(
{ projectId, userId, filename, line, column, command, stdout },
'synctex code output'
)
- return SynctexOutputParser.parseViewOutput(stdout)
+ return {
+ codePositions: SynctexOutputParser.parseViewOutput(stdout),
+ downloadedFromCache,
+ }
}
-async function syncFromPdf(projectId, userId, page, h, v, imageName) {
+async function syncFromPdf(projectId, userId, page, h, v, opts) {
const compileName = getCompileName(projectId, userId)
const baseDir = Settings.path.synctexBaseDir(compileName)
const outputFilePath = `${baseDir}/output.pdf`
@@ -457,9 +474,17 @@ async function syncFromPdf(projectId, userId, page, h, v, imageName) {
'-o',
`${page}:${h}:${v}:${outputFilePath}`,
]
- const stdout = await _runSynctex(projectId, userId, command, imageName)
+ const { stdout, downloadedFromCache } = await _runSynctex(
+ projectId,
+ userId,
+ command,
+ opts
+ )
logger.debug({ projectId, userId, page, h, v, stdout }, 'synctex pdf output')
- return SynctexOutputParser.parseEditOutput(stdout, baseDir)
+ return {
+ pdfPositions: SynctexOutputParser.parseEditOutput(stdout, baseDir),
+ downloadedFromCache,
+ }
}
async function _checkFileExists(dir, filename) {
@@ -486,32 +511,90 @@ async function _checkFileExists(dir, filename) {
}
}
-async function _runSynctex(projectId, userId, command, imageName) {
- const directory = getCompileDir(projectId, userId)
+async function _runSynctex(projectId, userId, command, opts) {
+ const { imageName, editorId, buildId, compileFromClsiCache } = opts
+
+ if (imageName && !_isImageNameAllowed(imageName)) {
+ throw new Errors.InvalidParameter('invalid image')
+ }
+ if (editorId && !/^[a-f0-9-]+$/.test(editorId)) {
+ throw new Errors.InvalidParameter('invalid editorId')
+ }
+ if (buildId && !OutputCacheManager.BUILD_REGEX.test(buildId)) {
+ throw new Errors.InvalidParameter('invalid buildId')
+ }
+
+ const outputDir = getOutputDir(projectId, userId)
+ const runInOutputDir = buildId && CommandRunner.canRunSyncTeXInOutputDir()
+
+ const directory = runInOutputDir
+ ? Path.join(outputDir, OutputCacheManager.CACHE_SUBDIR, buildId)
+ : getCompileDir(projectId, userId)
const timeout = 60 * 1000 // increased to allow for large projects
const compileName = getCompileName(projectId, userId)
- const compileGroup = 'synctex'
+ const compileGroup = runInOutputDir ? 'synctex-output' : 'synctex'
const defaultImageName =
Settings.clsi && Settings.clsi.docker && Settings.clsi.docker.image
- await _checkFileExists(directory, 'output.synctex.gz')
- try {
- const output = await CommandRunner.promises.run(
- compileName,
- command,
- directory,
- imageName || defaultImageName,
- timeout,
- {},
- compileGroup
- )
- return output.stdout
- } catch (error) {
- throw OError.tag(error, 'error running synctex', {
- command,
- projectId,
- userId,
- })
- }
+ // eslint-disable-next-line @typescript-eslint/return-await
+ return await OutputCacheManager.promises.queueDirOperation(
+ outputDir,
+ /**
+ * @return {Promise<{stdout: string, downloadedFromCache: boolean}>}
+ */
+ async () => {
+ let downloadedFromCache = false
+ try {
+ await _checkFileExists(directory, 'output.synctex.gz')
+ } catch (err) {
+ if (
+ err instanceof Errors.NotFoundError &&
+ compileFromClsiCache &&
+ editorId &&
+ buildId
+ ) {
+ try {
+ downloadedFromCache =
+ await downloadOutputDotSynctexFromCompileCache(
+ projectId,
+ userId,
+ editorId,
+ buildId,
+ directory
+ )
+ } catch (err) {
+ logger.warn(
+ { err, projectId, userId, editorId, buildId },
+ 'failed to download output.synctex.gz from clsi-cache'
+ )
+ }
+ await _checkFileExists(directory, 'output.synctex.gz')
+ } else {
+ throw err
+ }
+ }
+ try {
+ const { stdout } = await CommandRunner.promises.run(
+ compileName,
+ command,
+ directory,
+ imageName || defaultImageName,
+ timeout,
+ {},
+ compileGroup
+ )
+ return {
+ stdout,
+ downloadedFromCache,
+ }
+ } catch (error) {
+ throw OError.tag(error, 'error running synctex', {
+ command,
+ projectId,
+ userId,
+ })
+ }
+ }
+ )
}
async function wordcount(projectId, userId, filename, image) {
@@ -523,6 +606,10 @@ async function wordcount(projectId, userId, filename, image) {
const compileName = getCompileName(projectId, userId)
const compileGroup = 'wordcount'
+ if (image && !_isImageNameAllowed(image)) {
+ throw new Errors.InvalidParameter('invalid image')
+ }
+
try {
await fsPromises.mkdir(compileDir, { recursive: true })
} catch (err) {
@@ -610,13 +697,25 @@ function _parseWordcountFromOutput(output) {
return results
}
+function _isImageNameAllowed(imageName) {
+ const ALLOWED_IMAGES =
+ Settings.clsi && Settings.clsi.docker && Settings.clsi.docker.allowedImages
+ return !ALLOWED_IMAGES || ALLOWED_IMAGES.includes(imageName)
+}
+
module.exports = {
doCompileWithLock: callbackify(doCompileWithLock),
stopCompile: callbackify(stopCompile),
clearProject: callbackify(clearProject),
clearExpiredProjects: callbackify(clearExpiredProjects),
- syncFromCode: callbackify(syncFromCode),
- syncFromPdf: callbackify(syncFromPdf),
+ syncFromCode: callbackifyMultiResult(syncFromCode, [
+ 'codePositions',
+ 'downloadedFromCache',
+ ]),
+ syncFromPdf: callbackifyMultiResult(syncFromPdf, [
+ 'pdfPositions',
+ 'downloadedFromCache',
+ ]),
wordcount: callbackify(wordcount),
promises: {
doCompileWithLock,
diff --git a/services/clsi/app/js/DockerRunner.js b/services/clsi/app/js/DockerRunner.js
index 7aac613db4..97053c1875 100644
--- a/services/clsi/app/js/DockerRunner.js
+++ b/services/clsi/app/js/DockerRunner.js
@@ -6,21 +6,12 @@ const dockerode = new Docker()
const crypto = require('node:crypto')
const async = require('async')
const LockManager = require('./DockerLockManager')
-const fs = require('node:fs')
const Path = require('node:path')
const _ = require('lodash')
const ONE_HOUR_IN_MS = 60 * 60 * 1000
logger.debug('using docker runner')
-function usingSiblingContainers() {
- return (
- Settings != null &&
- Settings.path != null &&
- Settings.path.sandboxedCompilesHostDir != null
- )
-}
-
let containerMonitorTimeout
let containerMonitorInterval
@@ -35,24 +26,6 @@ const DockerRunner = {
compileGroup,
callback
) {
- if (usingSiblingContainers()) {
- const _newPath = Settings.path.sandboxedCompilesHostDir
- logger.debug(
- { path: _newPath },
- 'altering bind path for sibling containers'
- )
- // Server Pro, example:
- // '/var/lib/overleaf/data/compiles/'
- // ... becomes ...
- // '/opt/overleaf_data/data/compiles/'
- directory = Path.join(
- Settings.path.sandboxedCompilesHostDir,
- Path.basename(directory)
- )
- }
-
- const volumes = { [directory]: '/compile' }
-
command = command.map(arg =>
arg.toString().replace('$COMPILE_DIR', '/compile')
)
@@ -72,7 +45,32 @@ const DockerRunner = {
image = `${Settings.texliveImageNameOveride}/${img[2]}`
}
- if (compileGroup === 'synctex' || compileGroup === 'wordcount') {
+ if (compileGroup === 'synctex-output') {
+ // In: directory = '/overleaf/services/clsi/output/projectId-userId/generated-files/buildId'
+ // directory.split('/').slice(-3) === 'projectId-userId/generated-files/buildId'
+ // sandboxedCompilesHostDirOutput = '/host/output'
+ // Out: directory = '/host/output/projectId-userId/generated-files/buildId'
+ directory = Path.join(
+ Settings.path.sandboxedCompilesHostDirOutput,
+ ...directory.split('/').slice(-3)
+ )
+ } else {
+ // In: directory = '/overleaf/services/clsi/compiles/projectId-userId'
+ // Path.basename(directory) === 'projectId-userId'
+ // sandboxedCompilesHostDirCompiles = '/host/compiles'
+ // Out: directory = '/host/compiles/projectId-userId'
+ directory = Path.join(
+ Settings.path.sandboxedCompilesHostDirCompiles,
+ Path.basename(directory)
+ )
+ }
+
+ const volumes = { [directory]: '/compile' }
+ if (
+ compileGroup === 'synctex' ||
+ compileGroup === 'synctex-output' ||
+ compileGroup === 'wordcount'
+ ) {
volumes[directory] += ':ro'
}
@@ -234,8 +232,8 @@ const DockerRunner = {
}
}
// set the path based on the image year
- const match = image.match(/:([0-9]+)\.[0-9]+/)
- const year = match ? match[1] : '2014'
+ const match = image.match(/:([0-9]+)\.[0-9]+|:TL([0-9]+)/)
+ const year = match ? match[1] || match[2] : '2014'
env.PATH = `/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/texlive/${year}/bin/x86_64-linux/`
const options = {
Cmd: command,
@@ -309,50 +307,17 @@ const DockerRunner = {
LockManager.runWithLock(
options.name,
releaseLock =>
- // Check that volumes exist before starting the container.
- // When a container is started with volume pointing to a
- // non-existent directory then docker creates the directory but
- // with root ownership.
- DockerRunner._checkVolumes(options, volumes, err => {
- if (err != null) {
- return releaseLock(err)
- }
- DockerRunner._startContainer(
- options,
- volumes,
- attachStreamHandler,
- releaseLock
- )
- }),
-
+ DockerRunner._startContainer(
+ options,
+ volumes,
+ attachStreamHandler,
+ releaseLock
+ ),
callback
)
},
// Check that volumes exist and are directories
- _checkVolumes(options, volumes, callback) {
- if (usingSiblingContainers()) {
- // Server Pro, with sibling-containers active, skip checks
- return callback(null)
- }
-
- const checkVolume = (path, cb) =>
- fs.stat(path, (err, stats) => {
- if (err != null) {
- return cb(err)
- }
- if (!stats.isDirectory()) {
- return cb(new Error('not a directory'))
- }
- cb()
- })
- const jobs = []
- for (const vol in volumes) {
- jobs.push(cb => checkVolume(vol, cb))
- }
- async.series(jobs, callback)
- },
-
_startContainer(options, volumes, attachStreamHandler, callback) {
callback = _.once(callback)
const { name } = options
@@ -617,6 +582,10 @@ const DockerRunner = {
containerMonitorInterval = undefined
}
},
+
+ canRunSyncTeXInOutputDir() {
+ return Boolean(Settings.path.sandboxedCompilesHostDirOutput)
+ },
}
DockerRunner.startContainerMonitor()
diff --git a/services/clsi/app/js/Errors.js b/services/clsi/app/js/Errors.js
index 5c5fd3745a..64c3c7b59a 100644
--- a/services/clsi/app/js/Errors.js
+++ b/services/clsi/app/js/Errors.js
@@ -35,6 +35,7 @@ class QueueLimitReachedError extends OError {}
class TimedOutError extends OError {}
class NoXrefTableError extends OError {}
class TooManyCompileRequestsError extends OError {}
+class InvalidParameter extends OError {}
module.exports = Errors = {
QueueLimitReachedError,
@@ -44,4 +45,5 @@ module.exports = Errors = {
AlreadyCompilingError,
NoXrefTableError,
TooManyCompileRequestsError,
+ InvalidParameter,
}
diff --git a/services/clsi/app/js/LocalCommandRunner.js b/services/clsi/app/js/LocalCommandRunner.js
index bac7d39400..aa62825443 100644
--- a/services/clsi/app/js/LocalCommandRunner.js
+++ b/services/clsi/app/js/LocalCommandRunner.js
@@ -54,6 +54,7 @@ module.exports = CommandRunner = {
cwd: directory,
env,
stdio: ['pipe', 'pipe', 'ignore'],
+ detached: true,
})
let stdout = ''
@@ -99,6 +100,10 @@ module.exports = CommandRunner = {
}
return callback()
},
+
+ canRunSyncTeXInOutputDir() {
+ return true
+ },
}
module.exports.promises = {
diff --git a/services/clsi/app/js/OutputCacheManager.js b/services/clsi/app/js/OutputCacheManager.js
index 1cad933c2d..a1a0a89aa7 100644
--- a/services/clsi/app/js/OutputCacheManager.js
+++ b/services/clsi/app/js/OutputCacheManager.js
@@ -83,6 +83,13 @@ async function cleanupDirectory(dir, options) {
})
}
+/**
+ * @template T
+ *
+ * @param {string} dir
+ * @param {() => Promise} fn
+ * @return {Promise}
+ */
async function queueDirOperation(dir, fn) {
const pending = PENDING_PROJECT_ACTIONS.get(dir) || Promise.resolve()
const p = pending.then(fn, fn).finally(() => {
@@ -98,12 +105,11 @@ module.exports = OutputCacheManager = {
CONTENT_SUBDIR: 'content',
CACHE_SUBDIR: 'generated-files',
ARCHIVE_SUBDIR: 'archived-logs',
- // build id is HEXDATE-HEXRANDOM from Date.now()and RandomBytes
- // for backwards compatibility, make the randombytes part optional
- BUILD_REGEX: /^[0-9a-f]+(-[0-9a-f]+)?$/,
- CONTENT_REGEX: /^[0-9a-f]+(-[0-9a-f]+)?$/,
+ // build id is HEXDATE-HEXRANDOM from Date.now() and RandomBytes
+ BUILD_REGEX: /^[0-9a-f]+-[0-9a-f]+$/,
+ CONTENT_REGEX: /^[0-9a-f]+-[0-9a-f]+$/,
CACHE_LIMIT: 2, // maximum number of cache directories
- CACHE_AGE: 60 * 60 * 1000, // up to one hour old
+ CACHE_AGE: 90 * 60 * 1000, // up to 90 minutes old
init,
queueDirOperation: callbackify(queueDirOperation),
@@ -137,7 +143,11 @@ module.exports = OutputCacheManager = {
outputDir,
callback
) {
- OutputCacheManager.generateBuildId(function (err, buildId) {
+ const getBuildId = cb => {
+ if (request.buildId) return cb(null, request.buildId)
+ OutputCacheManager.generateBuildId(cb)
+ }
+ getBuildId(function (err, buildId) {
if (err) {
return callback(err)
}
@@ -245,7 +255,7 @@ module.exports = OutputCacheManager = {
{ err, directory: cacheDir },
'error creating cache directory'
)
- callback(err, outputFiles)
+ callback(err)
} else {
// copy all the output files into the new cache directory
const results = []
@@ -263,7 +273,6 @@ module.exports = OutputCacheManager = {
return cb()
}
// copy other files into cache directory if valid
- const newFile = _.clone(file)
const src = Path.join(compileDir, file.path)
const dst = Path.join(cacheDir, file.path)
OutputCacheManager._checkIfShouldCopy(
@@ -279,8 +288,8 @@ module.exports = OutputCacheManager = {
if (err) {
return cb(err)
}
- newFile.build = buildId // attach a build id if we cached the file
- results.push(newFile)
+ file.build = buildId
+ results.push(file)
cb()
})
}
@@ -288,8 +297,7 @@ module.exports = OutputCacheManager = {
},
function (err) {
if (err) {
- // pass back the original files if we encountered *any* error
- callback(err, outputFiles)
+ callback(err)
// clean up the directory we just created
fs.rm(cacheDir, { force: true, recursive: true }, function (err) {
if (err) {
@@ -301,7 +309,7 @@ module.exports = OutputCacheManager = {
})
} else {
// pass back the list of new files in the cache
- callback(err, results)
+ callback(null, results)
// let file expiry run in the background, expire all previous files if per-user
cleanupDirectory(outputDir, {
keep: buildId,
@@ -676,4 +684,5 @@ OutputCacheManager.promises = {
saveOutputFilesInBuildDir: promisify(
OutputCacheManager.saveOutputFilesInBuildDir
),
+ queueDirOperation,
}
diff --git a/services/clsi/app/js/OutputFileArchiveManager.js b/services/clsi/app/js/OutputFileArchiveManager.js
index 3c5a6c8197..64c5198392 100644
--- a/services/clsi/app/js/OutputFileArchiveManager.js
+++ b/services/clsi/app/js/OutputFileArchiveManager.js
@@ -7,7 +7,7 @@ const { NotFoundError } = require('./Errors')
const logger = require('@overleaf/logger')
// NOTE: Updating this list requires a corresponding change in
-// * services/web/frontend/js/features/pdf-preview/util/file-list.js
+// * services/web/frontend/js/features/pdf-preview/util/file-list.ts
const ignoreFiles = ['output.fls', 'output.fdb_latexmk']
function getContentDir(projectId, userId) {
@@ -93,8 +93,11 @@ module.exports = {
)
return outputFiles.filter(
- // Ignore the pdf and also ignore the files ignored by the frontend.
- ({ path }) => path !== 'output.pdf' && !ignoreFiles.includes(path)
+ // Ignore the pdf, clsi-cache tar-ball and also ignore the files ignored by the frontend.
+ ({ path }) =>
+ path !== 'output.pdf' &&
+ path !== 'output.tar.gz' &&
+ !ignoreFiles.includes(path)
)
} catch (error) {
if (
diff --git a/services/clsi/app/js/OutputFileOptimiser.js b/services/clsi/app/js/OutputFileOptimiser.js
index 0548defc21..09ca98672d 100644
--- a/services/clsi/app/js/OutputFileOptimiser.js
+++ b/services/clsi/app/js/OutputFileOptimiser.js
@@ -74,9 +74,7 @@ module.exports = OutputFileOptimiser = {
logger.debug({ args }, 'running qpdf command')
const timer = new Metrics.Timer('qpdf')
- const proc = spawn('qpdf', args)
- let stdout = ''
- proc.stdout.setEncoding('utf8').on('data', chunk => (stdout += chunk))
+ const proc = spawn('qpdf', args, { stdio: 'ignore' })
callback = _.once(callback) // avoid double call back for error and close event
proc.on('error', function (err) {
logger.warn({ err, args }, 'qpdf failed')
diff --git a/services/clsi/app/js/ProjectPersistenceManager.js b/services/clsi/app/js/ProjectPersistenceManager.js
index 66c6be5108..41cdd07f4d 100644
--- a/services/clsi/app/js/ProjectPersistenceManager.js
+++ b/services/clsi/app/js/ProjectPersistenceManager.js
@@ -13,8 +13,8 @@ const CompileManager = require('./CompileManager')
const async = require('async')
const logger = require('@overleaf/logger')
const oneDay = 24 * 60 * 60 * 1000
+const Metrics = require('@overleaf/metrics')
const Settings = require('@overleaf/settings')
-const diskusage = require('diskusage')
const { callbackify } = require('node:util')
const Path = require('node:path')
const fs = require('node:fs')
@@ -22,38 +22,81 @@ const fs = require('node:fs')
// projectId -> timestamp mapping.
const LAST_ACCESS = new Map()
-async function refreshExpiryTimeout() {
+let ANY_DISK_LOW = false
+let ANY_DISK_CRITICAL_LOW = false
+
+async function collectDiskStats() {
const paths = [
Settings.path.compilesDir,
Settings.path.outputDir,
Settings.path.clsiCacheDir,
]
+
+ const diskStats = {}
+ let anyDiskLow = false
+ let anyDiskCriticalLow = false
for (const path of paths) {
try {
- const stats = await diskusage.check(path)
- const lowDisk = stats.available / stats.total < 0.1
-
- const lowerExpiry = ProjectPersistenceManager.EXPIRY_TIMEOUT * 0.9
- if (lowDisk && Settings.project_cache_length_ms / 2 < lowerExpiry) {
- logger.warn(
- {
- stats,
- newExpiryTimeoutInDays: (lowerExpiry / oneDay).toFixed(2),
- },
- 'disk running low on space, modifying EXPIRY_TIMEOUT'
- )
- ProjectPersistenceManager.EXPIRY_TIMEOUT = lowerExpiry
- break
+ const { blocks, bavail, bsize } = await fs.promises.statfs(path)
+ const stats = {
+ // Warning: these values will be wrong by a factor in Docker-for-Mac.
+ // See https://github.com/docker/for-mac/issues/2136
+ total: blocks * bsize, // Total size of the file system in bytes
+ available: bavail * bsize, // Free space available to unprivileged users.
}
+ const diskAvailablePercent = (stats.available / stats.total) * 100
+ Metrics.gauge('disk_available_percent', diskAvailablePercent, 1, {
+ path,
+ })
+ const lowDisk = diskAvailablePercent < 10
+ diskStats[path] = { stats, lowDisk }
+
+ const criticalLowDisk = diskAvailablePercent < 3
+ anyDiskLow = anyDiskLow || lowDisk
+ anyDiskCriticalLow = anyDiskCriticalLow || criticalLowDisk
} catch (err) {
logger.err({ err, path }, 'error getting disk usage')
}
}
+ ANY_DISK_LOW = anyDiskLow
+ ANY_DISK_CRITICAL_LOW = anyDiskCriticalLow
+ return diskStats
+}
+
+async function refreshExpiryTimeout() {
+ for (const [path, { stats, lowDisk }] of Object.entries(
+ await collectDiskStats()
+ )) {
+ const lowerExpiry = ProjectPersistenceManager.EXPIRY_TIMEOUT * 0.9
+ if (lowDisk && Settings.project_cache_length_ms / 2 < lowerExpiry) {
+ logger.warn(
+ {
+ path,
+ stats,
+ newExpiryTimeoutInDays: (lowerExpiry / oneDay).toFixed(2),
+ },
+ 'disk running low on space, modifying EXPIRY_TIMEOUT'
+ )
+ ProjectPersistenceManager.EXPIRY_TIMEOUT = lowerExpiry
+ break
+ }
+ }
+ Metrics.gauge(
+ 'project_persistence_expiry_timeout',
+ ProjectPersistenceManager.EXPIRY_TIMEOUT
+ )
}
module.exports = ProjectPersistenceManager = {
EXPIRY_TIMEOUT: Settings.project_cache_length_ms || oneDay * 2.5,
+ isAnyDiskLow() {
+ return ANY_DISK_LOW
+ },
+ isAnyDiskCriticalLow() {
+ return ANY_DISK_CRITICAL_LOW
+ },
+
promises: {
refreshExpiryTimeout,
},
@@ -103,6 +146,13 @@ module.exports = ProjectPersistenceManager = {
}
)
})
+
+ // Collect disk stats frequently to have them ready the next time /metrics is scraped (60s +- jitter) or every 5th scrape of the load agent (3s +- jitter).
+ setInterval(() => {
+ collectDiskStats().catch(err => {
+ logger.err({ err }, 'low level error collecting disk stats')
+ })
+ }, 15_000)
},
markProjectAsJustAccessed(projectId, callback) {
diff --git a/services/clsi/app/js/RequestParser.js b/services/clsi/app/js/RequestParser.js
index 28e182ea44..4e9d722921 100644
--- a/services/clsi/app/js/RequestParser.js
+++ b/services/clsi/app/js/RequestParser.js
@@ -1,7 +1,9 @@
const settings = require('@overleaf/settings')
+const OutputCacheManager = require('./OutputCacheManager')
const VALID_COMPILERS = ['pdflatex', 'latex', 'xelatex', 'lualatex']
const MAX_TIMEOUT = 600
+const EDITOR_ID_REGEX = /^[a-f0-9-]{36}$/ // UUID
function parse(body, callback) {
const response = {}
@@ -27,12 +29,24 @@ function parse(body, callback) {
default: '',
type: 'string',
}),
+ // Will be populated later. Must always be populated for prom library.
+ compile: 'initial',
}
response.compiler = _parseAttribute('compiler', compile.options.compiler, {
validValues: VALID_COMPILERS,
default: 'pdflatex',
type: 'string',
})
+ response.compileFromClsiCache = _parseAttribute(
+ 'compileFromClsiCache',
+ compile.options.compileFromClsiCache,
+ { default: false, type: 'boolean' }
+ )
+ response.populateClsiCache = _parseAttribute(
+ 'populateClsiCache',
+ compile.options.populateClsiCache,
+ { default: false, type: 'boolean' }
+ )
response.enablePdfCaching = _parseAttribute(
'enablePdfCaching',
compile.options.enablePdfCaching,
@@ -135,6 +149,15 @@ function parse(body, callback) {
}
)
response.rootResourcePath = _checkPath(rootResourcePath)
+
+ response.editorId = _parseAttribute('editorId', compile.options.editorId, {
+ type: 'string',
+ regex: EDITOR_ID_REGEX,
+ })
+ response.buildId = _parseAttribute('buildId', compile.options.buildId, {
+ type: 'string',
+ regex: OutputCacheManager.BUILD_REGEX,
+ })
} catch (error1) {
const error = error1
return callback(error)
@@ -199,6 +222,13 @@ function _parseAttribute(name, attribute, options) {
throw new Error(`${name} attribute should be a ${options.type}`)
}
}
+ if (options.type === 'string' && options.regex instanceof RegExp) {
+ if (!options.regex.test(attribute)) {
+ throw new Error(
+ `${name} attribute does not match regex ${options.regex}`
+ )
+ }
+ }
} else {
if (options.default != null) {
return options.default
diff --git a/services/clsi/app/js/ResourceWriter.js b/services/clsi/app/js/ResourceWriter.js
index 6fa6f85e1f..bf88538746 100644
--- a/services/clsi/app/js/ResourceWriter.js
+++ b/services/clsi/app/js/ResourceWriter.js
@@ -200,73 +200,22 @@ module.exports = ResourceWriter = {
return OutputFileFinder.findOutputFiles(
resources,
basePath,
- function (error, outputFiles, allFiles) {
+ (error, outputFiles, allFiles) => {
if (error != null) {
return callback(error)
}
const jobs = []
- for (const file of Array.from(outputFiles || [])) {
- ;(function (file) {
- const { path } = file
- let shouldDelete = true
- if (
- path.match(/^output\./) ||
- path.match(/\.aux$/) ||
- path.match(/^cache\//)
- ) {
- // knitr cache
- shouldDelete = false
- }
- if (path.match(/^output-.*/)) {
- // Tikz cached figures (default case)
- shouldDelete = false
- }
- if (path.match(/\.(pdf|dpth|md5)$/)) {
- // Tikz cached figures (by extension)
- shouldDelete = false
- }
- if (
- path.match(/\.(pygtex|pygstyle)$/) ||
- path.match(/(^|\/)_minted-[^\/]+\//)
- ) {
- // minted files/directory
- shouldDelete = false
- }
- if (
- path.match(/\.md\.tex$/) ||
- path.match(/(^|\/)_markdown_[^\/]+\//)
- ) {
- // markdown files/directory
- shouldDelete = false
- }
- if (path.match(/-eps-converted-to\.pdf$/)) {
- // Epstopdf generated files
- shouldDelete = false
- }
- if (
- path === 'output.pdf' ||
- path === 'output.dvi' ||
- path === 'output.log' ||
- path === 'output.xdv' ||
- path === 'output.stdout' ||
- path === 'output.stderr'
- ) {
- shouldDelete = true
- }
- if (path === 'output.tex') {
- // created by TikzManager if present in output files
- shouldDelete = true
- }
- if (shouldDelete) {
- return jobs.push(callback =>
- ResourceWriter._deleteFileIfNotDirectory(
- Path.join(basePath, path),
- callback
- )
+ for (const { path } of outputFiles || []) {
+ const shouldDelete = ResourceWriter.isExtraneousFile(path)
+ if (shouldDelete) {
+ jobs.push(callback =>
+ ResourceWriter._deleteFileIfNotDirectory(
+ Path.join(basePath, path),
+ callback
)
- }
- })(file)
+ )
+ }
}
return async.series(jobs, function (error) {
@@ -279,6 +228,59 @@ module.exports = ResourceWriter = {
)
},
+ isExtraneousFile(path) {
+ let shouldDelete = true
+ if (
+ path.match(/^output\./) ||
+ path.match(/\.aux$/) ||
+ path.match(/^cache\//)
+ ) {
+ // knitr cache
+ shouldDelete = false
+ }
+ if (path.match(/^output-.*/)) {
+ // Tikz cached figures (default case)
+ shouldDelete = false
+ }
+ if (path.match(/\.(pdf|dpth|md5)$/)) {
+ // Tikz cached figures (by extension)
+ shouldDelete = false
+ }
+ if (
+ path.match(/\.(pygtex|pygstyle)$/) ||
+ path.match(/(^|\/)_minted-[^\/]+\//)
+ ) {
+ // minted files/directory
+ shouldDelete = false
+ }
+ if (path.match(/\.md\.tex$/) || path.match(/(^|\/)_markdown_[^\/]+\//)) {
+ // markdown files/directory
+ shouldDelete = false
+ }
+ if (path.match(/-eps-converted-to\.pdf$/)) {
+ // Epstopdf generated files
+ shouldDelete = false
+ }
+ if (
+ path === 'output.tar.gz' ||
+ path === 'output.synctex.gz' ||
+ path === 'output.pdfxref' ||
+ path === 'output.pdf' ||
+ path === 'output.dvi' ||
+ path === 'output.log' ||
+ path === 'output.xdv' ||
+ path === 'output.stdout' ||
+ path === 'output.stderr'
+ ) {
+ shouldDelete = true
+ }
+ if (path === 'output.tex') {
+ // created by TikzManager if present in output files
+ shouldDelete = true
+ }
+ return shouldDelete
+ },
+
_deleteFileIfNotDirectory(path, callback) {
if (callback == null) {
callback = function () {}
diff --git a/services/clsi/buildscript.txt b/services/clsi/buildscript.txt
index 591c0ef68b..09c21888df 100644
--- a/services/clsi/buildscript.txt
+++ b/services/clsi/buildscript.txt
@@ -1,11 +1,11 @@
clsi
--data-dirs=cache,compiles,output
--dependencies=
---docker-repos=gcr.io/overleaf-ops,us-east1-docker.pkg.dev/overleaf-ops/ol-docker
---env-add=ENABLE_PDF_CACHING="true",PDF_CACHING_ENABLE_WORKER_POOL="true",ALLOWED_IMAGES=quay.io/sharelatex/texlive-full:2017.1,TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2017.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=gcr.io/overleaf-ops,TEXLIVE_IMAGE_USER="tex",DOCKER_RUNNER="true",COMPILES_HOST_DIR=$PWD/compiles
+--docker-repos=us-east1-docker.pkg.dev/overleaf-ops/ol-docker
+--env-add=ENABLE_PDF_CACHING="true",PDF_CACHING_ENABLE_WORKER_POOL="true",ALLOWED_IMAGES=quay.io/sharelatex/texlive-full:2017.1,TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2017.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=us-east1-docker.pkg.dev/overleaf-ops/ol-docker,TEXLIVE_IMAGE_USER="tex",SANDBOXED_COMPILES="true",SANDBOXED_COMPILES_HOST_DIR_COMPILES=$PWD/compiles,SANDBOXED_COMPILES_HOST_DIR_OUTPUT=$PWD/output
--env-pass-through=
--esmock-loader=False
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=True
---script-version=4.5.0
+--script-version=4.7.0
--use-large-ci-runner=True
diff --git a/services/clsi/config/settings.defaults.js b/services/clsi/config/settings.defaults.js
index 21fe82e533..bd5614eb98 100644
--- a/services/clsi/config/settings.defaults.js
+++ b/services/clsi/config/settings.defaults.js
@@ -1,11 +1,8 @@
const Path = require('node:path')
const os = require('node:os')
-const http = require('node:http')
-const https = require('node:https')
-http.globalAgent.keepAlive = false
-https.globalAgent.keepAlive = false
-const isPreEmptible = os.hostname().includes('pre-emp')
+const isPreEmptible = process.env.PREEMPTIBLE === 'TRUE'
+const CLSI_SERVER_ID = os.hostname().replace('-ctr', '')
module.exports = {
compileSizeLimit: process.env.COMPILE_SIZE_LIMIT || '7mb',
@@ -37,6 +34,10 @@ module.exports = {
report_load: process.env.LOAD_BALANCER_AGENT_REPORT_LOAD !== 'false',
load_port: 3048,
local_port: 3049,
+ allow_maintenance:
+ (
+ process.env.LOAD_BALANCER_AGENT_ALLOW_MAINTENANCE ?? ''
+ ).toLowerCase() !== 'false',
},
},
apis: {
@@ -45,12 +46,19 @@ module.exports = {
url: `http://${process.env.CLSI_HOST || '127.0.0.1'}:3013`,
// External url prefix for output files, e.g. for requests via load-balancers.
outputUrlPrefix: `${process.env.ZONE ? `/zone/${process.env.ZONE}` : ''}`,
+ clsiServerId: process.env.CLSI_SERVER_ID || CLSI_SERVER_ID,
+
+ downloadHost: process.env.DOWNLOAD_HOST || 'http://localhost:3013',
},
clsiPerf: {
host: `${process.env.CLSI_PERF_HOST || '127.0.0.1'}:${
process.env.CLSI_PERF_PORT || '3043'
}`,
},
+ clsiCache: {
+ enabled: !!process.env.CLSI_CACHE_SHARDS,
+ shards: JSON.parse(process.env.CLSI_CACHE_SHARDS || '[]'),
+ },
},
smokeTest: process.env.SMOKE_TEST || false,
@@ -85,20 +93,21 @@ if (process.env.ALLOWED_COMPILE_GROUPS) {
}
}
-if (process.env.DOCKER_RUNNER) {
- let seccompProfilePath
+if ((process.env.DOCKER_RUNNER || process.env.SANDBOXED_COMPILES) === 'true') {
module.exports.clsi = {
- dockerRunner: process.env.DOCKER_RUNNER === 'true',
+ dockerRunner: true,
docker: {
runtime: process.env.DOCKER_RUNTIME,
image:
- process.env.TEXLIVE_IMAGE || 'quay.io/sharelatex/texlive-full:2017.1',
+ process.env.TEXLIVE_IMAGE ||
+ process.env.TEX_LIVE_DOCKER_IMAGE ||
+ 'quay.io/sharelatex/texlive-full:2017.1',
env: {
HOME: '/tmp',
CLSI: 1,
},
socketPath: '/var/run/docker.sock',
- user: process.env.TEXLIVE_IMAGE_USER || 'tex',
+ user: process.env.TEXLIVE_IMAGE_USER || 'www-data',
},
optimiseInDocker: true,
expireProjectAfterIdleMs: 24 * 60 * 60 * 1000,
@@ -118,6 +127,7 @@ if (process.env.DOCKER_RUNNER) {
const defaultCompileGroupConfig = {
wordcount: { 'HostConfig.AutoRemove': true },
synctex: { 'HostConfig.AutoRemove': true },
+ 'synctex-output': { 'HostConfig.AutoRemove': true },
}
module.exports.clsi.docker.compileGroupConfig = Object.assign(
defaultCompileGroupConfig,
@@ -128,11 +138,14 @@ if (process.env.DOCKER_RUNNER) {
process.exit(1)
}
+ let seccompProfilePath
try {
seccompProfilePath = Path.resolve(__dirname, '../seccomp/clsi-profile.json')
- module.exports.clsi.docker.seccomp_profile = JSON.stringify(
- JSON.parse(require('node:fs').readFileSync(seccompProfilePath))
- )
+ module.exports.clsi.docker.seccomp_profile =
+ process.env.SECCOMP_PROFILE ||
+ JSON.stringify(
+ JSON.parse(require('node:fs').readFileSync(seccompProfilePath))
+ )
} catch (error) {
console.error(
error,
@@ -162,5 +175,23 @@ if (process.env.DOCKER_RUNNER) {
module.exports.path.synctexBaseDir = () => '/compile'
- module.exports.path.sandboxedCompilesHostDir = process.env.COMPILES_HOST_DIR
+ module.exports.path.sandboxedCompilesHostDirCompiles =
+ process.env.SANDBOXED_COMPILES_HOST_DIR_COMPILES ||
+ process.env.SANDBOXED_COMPILES_HOST_DIR ||
+ process.env.COMPILES_HOST_DIR
+ if (!module.exports.path.sandboxedCompilesHostDirCompiles) {
+ throw new Error(
+ 'SANDBOXED_COMPILES enabled, but SANDBOXED_COMPILES_HOST_DIR_COMPILES not set'
+ )
+ }
+
+ module.exports.path.sandboxedCompilesHostDirOutput =
+ process.env.SANDBOXED_COMPILES_HOST_DIR_OUTPUT ||
+ process.env.OUTPUT_HOST_DIR
+ if (!module.exports.path.sandboxedCompilesHostDirOutput) {
+ // TODO(das7pad): Enforce in a future major version of Server Pro.
+ // throw new Error(
+ // 'SANDBOXED_COMPILES enabled, but SANDBOXED_COMPILES_HOST_DIR_OUTPUT not set'
+ // )
+ }
}
diff --git a/services/clsi/docker-compose.ci.yml b/services/clsi/docker-compose.ci.yml
index 00f54c6e72..77a45615b7 100644
--- a/services/clsi/docker-compose.ci.yml
+++ b/services/clsi/docker-compose.ci.yml
@@ -27,10 +27,11 @@ services:
PDF_CACHING_ENABLE_WORKER_POOL: "true"
ALLOWED_IMAGES: quay.io/sharelatex/texlive-full:2017.1
TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2017.1
- TEX_LIVE_IMAGE_NAME_OVERRIDE: gcr.io/overleaf-ops
+ TEX_LIVE_IMAGE_NAME_OVERRIDE: us-east1-docker.pkg.dev/overleaf-ops/ol-docker
TEXLIVE_IMAGE_USER: "tex"
- DOCKER_RUNNER: "true"
- COMPILES_HOST_DIR: $PWD/compiles
+ SANDBOXED_COMPILES: "true"
+ SANDBOXED_COMPILES_HOST_DIR_COMPILES: $PWD/compiles
+ SANDBOXED_COMPILES_HOST_DIR_OUTPUT: $PWD/output
volumes:
- ./compiles:/overleaf/services/clsi/compiles
- /var/run/docker.sock:/var/run/docker.sock
diff --git a/services/clsi/docker-compose.yml b/services/clsi/docker-compose.yml
index c72fb8b2c4..b8112a8e17 100644
--- a/services/clsi/docker-compose.yml
+++ b/services/clsi/docker-compose.yml
@@ -17,6 +17,7 @@ services:
working_dir: /overleaf/services/clsi
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
@@ -37,16 +38,17 @@ services:
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
ENABLE_PDF_CACHING: "true"
PDF_CACHING_ENABLE_WORKER_POOL: "true"
ALLOWED_IMAGES: quay.io/sharelatex/texlive-full:2017.1
TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2017.1
- TEX_LIVE_IMAGE_NAME_OVERRIDE: gcr.io/overleaf-ops
+ TEX_LIVE_IMAGE_NAME_OVERRIDE: us-east1-docker.pkg.dev/overleaf-ops/ol-docker
TEXLIVE_IMAGE_USER: "tex"
- DOCKER_RUNNER: "true"
- COMPILES_HOST_DIR: $PWD/compiles
+ SANDBOXED_COMPILES: "true"
+ SANDBOXED_COMPILES_HOST_DIR_COMPILES: $PWD/compiles
+ SANDBOXED_COMPILES_HOST_DIR_OUTPUT: $PWD/output
command: npm run --silent test:acceptance
diff --git a/services/clsi/entrypoint.sh b/services/clsi/entrypoint.sh
index 9446ab9e2d..b45899ab17 100755
--- a/services/clsi/entrypoint.sh
+++ b/services/clsi/entrypoint.sh
@@ -2,13 +2,12 @@
# add the node user to the docker group on the host
DOCKER_GROUP=$(stat -c '%g' /var/run/docker.sock)
-groupadd --non-unique --gid ${DOCKER_GROUP} dockeronhost
+groupadd --non-unique --gid "${DOCKER_GROUP}" dockeronhost
usermod -aG dockeronhost node
# compatibility: initial volume setup
mkdir -p /overleaf/services/clsi/cache && chown node:node /overleaf/services/clsi/cache
mkdir -p /overleaf/services/clsi/compiles && chown node:node /overleaf/services/clsi/compiles
-mkdir -p /overleaf/services/clsi/db && chown node:node /overleaf/services/clsi/db
mkdir -p /overleaf/services/clsi/output && chown node:node /overleaf/services/clsi/output
exec runuser -u node -- "$@"
diff --git a/services/clsi/kube.yaml b/services/clsi/kube.yaml
deleted file mode 100644
index d3fb04291e..0000000000
--- a/services/clsi/kube.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- name: clsi
- namespace: default
-spec:
- type: LoadBalancer
- ports:
- - port: 80
- protocol: TCP
- targetPort: 80
- selector:
- run: clsi
----
-apiVersion: extensions/v1beta1
-kind: Deployment
-metadata:
- name: clsi
- namespace: default
-spec:
- replicas: 2
- template:
- metadata:
- labels:
- run: clsi
- spec:
- containers:
- - name: clsi
- image: gcr.io/henry-terraform-admin/clsi
- imagePullPolicy: Always
- readinessProbe:
- httpGet:
- path: status
- port: 80
- periodSeconds: 5
- initialDelaySeconds: 0
- failureThreshold: 3
- successThreshold: 1
-
-
-
diff --git a/services/clsi/nginx.conf b/services/clsi/nginx.conf
index 2290aeb444..604eb93fbf 100644
--- a/services/clsi/nginx.conf
+++ b/services/clsi/nginx.conf
@@ -46,7 +46,7 @@ server {
}
# handle output files for specific users
- location ~ ^/project/([0-9a-f]+)/user/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z]+)$ {
+ location ~ ^/project/([0-9a-f]+)/user/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z.]+)$ {
if ($request_method = 'OPTIONS') {
# handle OPTIONS method for CORS requests
add_header 'Allow' 'GET,HEAD';
@@ -64,7 +64,7 @@ server {
alias /output/$1-$2/generated-files/$3/$4.blg;
}
# handle output files for anonymous users
- location ~ ^/project/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z]+)$ {
+ location ~ ^/project/([0-9a-f]+)/build/([0-9a-f-]+)/output/output\.([a-z.]+)$ {
if ($request_method = 'OPTIONS') {
# handle OPTIONS method for CORS requests
add_header 'Allow' 'GET,HEAD';
diff --git a/services/clsi/package.json b/services/clsi/package.json
index 980a893803..fe31c430bd 100644
--- a/services/clsi/package.json
+++ b/services/clsi/package.json
@@ -23,24 +23,24 @@
"@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*",
+ "@overleaf/stream-utils": "*",
"archiver": "5.3.2",
"async": "^3.2.5",
"body-parser": "^1.20.3",
"bunyan": "^1.8.15",
- "diskusage": "^1.1.3",
- "dockerode": "^3.1.0",
- "express": "^4.21.0",
+ "dockerode": "^4.0.7",
+ "express": "^4.21.2",
"lodash": "^4.17.21",
"p-limit": "^3.1.0",
"request": "^2.88.2",
"send": "^0.19.0",
+ "tar-fs": "^3.0.9",
"workerpool": "^6.1.5"
},
"devDependencies": {
- "@types/workerpool": "^6.1.0",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
- "mocha": "^10.2.0",
+ "mocha": "^11.1.0",
"mock-fs": "^5.1.2",
"node-fetch": "^2.7.0",
"sandboxed-module": "^2.0.4",
diff --git a/services/clsi/seccomp/clsi-profile.json b/services/clsi/seccomp/clsi-profile.json
index 084354b15c..ad95130f76 100644
--- a/services/clsi/seccomp/clsi-profile.json
+++ b/services/clsi/seccomp/clsi-profile.json
@@ -829,13 +829,19 @@
"args": []
},
{
- "name": "gettimeofday",
- "action": "SCMP_ACT_ALLOW",
- "args": []
- }, {
- "name": "epoll_pwait",
- "action": "SCMP_ACT_ALLOW",
- "args": []
+ "name": "gettimeofday",
+ "action": "SCMP_ACT_ALLOW",
+ "args": []
+ },
+ {
+ "name": "epoll_pwait",
+ "action": "SCMP_ACT_ALLOW",
+ "args": []
+ },
+ {
+ "name": "poll",
+ "action": "SCMP_ACT_ALLOW",
+ "args": []
}
]
-}
\ No newline at end of file
+}
diff --git a/services/clsi/test/acceptance/js/AllowedImageNamesTests.js b/services/clsi/test/acceptance/js/AllowedImageNamesTests.js
index 897f5d9c85..9cd7a65930 100644
--- a/services/clsi/test/acceptance/js/AllowedImageNamesTests.js
+++ b/services/clsi/test/acceptance/js/AllowedImageNamesTests.js
@@ -109,6 +109,7 @@ Hello world
width: 343.71106,
},
],
+ downloadedFromCache: false,
})
done()
}
@@ -146,6 +147,7 @@ Hello world
expect(error).to.not.exist
expect(result).to.deep.equal({
code: [{ file: 'main.tex', line: 3, column: -1 }],
+ downloadedFromCache: false,
})
done()
}
diff --git a/services/clsi/test/acceptance/js/BrokenLatexFileTests.js b/services/clsi/test/acceptance/js/BrokenLatexFileTests.js
index 71e9956c0d..46d07da092 100644
--- a/services/clsi/test/acceptance/js/BrokenLatexFileTests.js
+++ b/services/clsi/test/acceptance/js/BrokenLatexFileTests.js
@@ -11,6 +11,7 @@
const Client = require('./helpers/Client')
const request = require('request')
const ClsiApp = require('./helpers/ClsiApp')
+const { expect } = require('chai')
describe('Broken LaTeX file', function () {
before(function (done) {
@@ -58,9 +59,27 @@ Hello world
)
})
- return it('should return a failure status', function () {
+ it('should return a failure status', function () {
return this.body.compile.status.should.equal('failure')
})
+
+ it('should return isInitialCompile flag', function () {
+ expect(this.body.compile.stats.isInitialCompile).to.equal(1)
+ })
+
+ it('should return output files', function () {
+ // NOTE: No output.pdf file.
+ this.body.compile.outputFiles
+ .map(f => f.path)
+ .should.deep.equal([
+ 'output.aux',
+ 'output.fdb_latexmk',
+ 'output.fls',
+ 'output.log',
+ 'output.stderr',
+ 'output.stdout',
+ ])
+ })
})
return describe('on second run', function () {
@@ -80,8 +99,26 @@ Hello world
})
})
- return it('should return a failure status', function () {
+ it('should return a failure status', function () {
return this.body.compile.status.should.equal('failure')
})
+
+ it('should not return isInitialCompile flag', function () {
+ expect(this.body.compile.stats.isInitialCompile).to.not.exist
+ })
+
+ it('should return output files', function () {
+ // NOTE: No output.pdf file.
+ this.body.compile.outputFiles
+ .map(f => f.path)
+ .should.deep.equal([
+ 'output.aux',
+ 'output.fdb_latexmk',
+ 'output.fls',
+ 'output.log',
+ 'output.stderr',
+ 'output.stdout',
+ ])
+ })
})
})
diff --git a/services/clsi/test/acceptance/js/StopCompile.js b/services/clsi/test/acceptance/js/StopCompile.js
new file mode 100644
index 0000000000..103a70f37d
--- /dev/null
+++ b/services/clsi/test/acceptance/js/StopCompile.js
@@ -0,0 +1,47 @@
+const Client = require('./helpers/Client')
+const ClsiApp = require('./helpers/ClsiApp')
+const { expect } = require('chai')
+
+describe('Stop compile', function () {
+ before(function (done) {
+ this.request = {
+ options: {
+ timeout: 100,
+ }, // seconds
+ resources: [
+ {
+ path: 'main.tex',
+ content: `\
+\\documentclass{article}
+\\begin{document}
+\\def\\x{Hello!\\par\\x}
+\\x
+\\end{document}\
+`,
+ },
+ ],
+ }
+ this.project_id = Client.randomId()
+ ClsiApp.ensureRunning(() => {
+ // start the compile in the background
+ Client.compile(this.project_id, this.request, (error, res, body) => {
+ this.compileResult = { error, res, body }
+ })
+ // wait for 1 second before stopping the compile
+ setTimeout(() => {
+ Client.stopCompile(this.project_id, (error, res, body) => {
+ this.stopResult = { error, res, body }
+ setTimeout(done, 1000) // allow time for the compile request to terminate
+ })
+ }, 1000)
+ })
+ })
+
+ it('should force a compile response with an error status', function () {
+ expect(this.stopResult.error).to.be.null
+ expect(this.stopResult.res.statusCode).to.equal(204)
+ expect(this.compileResult.res.statusCode).to.equal(200)
+ expect(this.compileResult.body.compile.status).to.equal('terminated')
+ expect(this.compileResult.body.compile.error).to.equal('terminated')
+ })
+})
diff --git a/services/clsi/test/acceptance/js/SynctexTests.js b/services/clsi/test/acceptance/js/SynctexTests.js
index 5ba5bb5b5f..049f260259 100644
--- a/services/clsi/test/acceptance/js/SynctexTests.js
+++ b/services/clsi/test/acceptance/js/SynctexTests.js
@@ -67,6 +67,7 @@ Hello world
width: 343.71106,
},
],
+ downloadedFromCache: false,
})
return done()
}
@@ -87,6 +88,7 @@ Hello world
}
expect(codePositions).to.deep.equal({
code: [{ file: 'main.tex', line: 3, column: -1 }],
+ downloadedFromCache: false,
})
return done()
}
diff --git a/services/clsi/test/acceptance/js/TimeoutTests.js b/services/clsi/test/acceptance/js/TimeoutTests.js
index bca8ae71d2..e9175d223c 100644
--- a/services/clsi/test/acceptance/js/TimeoutTests.js
+++ b/services/clsi/test/acceptance/js/TimeoutTests.js
@@ -11,6 +11,7 @@
const Client = require('./helpers/Client')
const request = require('request')
const ClsiApp = require('./helpers/ClsiApp')
+const { expect } = require('chai')
describe('Timed out compile', function () {
before(function (done) {
@@ -54,6 +55,10 @@ describe('Timed out compile', function () {
return this.body.compile.status.should.equal('timedout')
})
+ it('should return isInitialCompile flag', function () {
+ expect(this.body.compile.stats.isInitialCompile).to.equal(1)
+ })
+
return it('should return the log output file name', function () {
const outputFilePaths = this.body.compile.outputFiles.map(x => x.path)
return outputFilePaths.should.include('output.log')
diff --git a/services/clsi/test/acceptance/js/helpers/Client.js b/services/clsi/test/acceptance/js/helpers/Client.js
index a0bdce734f..49bf7390c6 100644
--- a/services/clsi/test/acceptance/js/helpers/Client.js
+++ b/services/clsi/test/acceptance/js/helpers/Client.js
@@ -42,6 +42,16 @@ module.exports = Client = {
)
},
+ stopCompile(projectId, callback) {
+ if (callback == null) {
+ callback = function () {}
+ }
+ return request.post(
+ { url: `${this.host}/project/${projectId}/compile/stop` },
+ callback
+ )
+ },
+
clearCache(projectId, callback) {
if (callback == null) {
callback = function () {}
diff --git a/services/clsi/test/acceptance/js/helpers/ClsiApp.js b/services/clsi/test/acceptance/js/helpers/ClsiApp.js
index 4736315df8..38308e9129 100644
--- a/services/clsi/test/acceptance/js/helpers/ClsiApp.js
+++ b/services/clsi/test/acceptance/js/helpers/ClsiApp.js
@@ -10,8 +10,6 @@
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const app = require('../../../../app')
-require('@overleaf/logger').logger.level('info')
-const logger = require('@overleaf/logger')
const Settings = require('@overleaf/settings')
module.exports = {
@@ -37,7 +35,6 @@ module.exports = {
throw error
}
this.running = true
- logger.info('clsi running in dev mode')
return (() => {
const result = []
diff --git a/services/clsi/test/setup.js b/services/clsi/test/setup.js
index 19e1ae7165..b17507bf92 100644
--- a/services/clsi/test/setup.js
+++ b/services/clsi/test/setup.js
@@ -20,7 +20,7 @@ SandboxedModule.configure({
err() {},
},
},
- globals: { Buffer, console, process, URL },
+ globals: { Buffer, console, process, URL, Math },
sourceTransformers: {
removeNodePrefix: function (source) {
return source.replace(/require\(['"]node:/g, "require('")
diff --git a/services/clsi/test/unit/js/CompileControllerTests.js b/services/clsi/test/unit/js/CompileControllerTests.js
index d97e433f29..2ac8d9c2d7 100644
--- a/services/clsi/test/unit/js/CompileControllerTests.js
+++ b/services/clsi/test/unit/js/CompileControllerTests.js
@@ -1,54 +1,11 @@
const SandboxedModule = require('sandboxed-module')
const sinon = require('sinon')
-const { expect } = require('chai')
const modulePath = require('node:path').join(
__dirname,
'../../../app/js/CompileController'
)
const Errors = require('../../../app/js/Errors')
-function tryImageNameValidation(method, imageNameField) {
- describe('when allowedImages is set', function () {
- beforeEach(function () {
- this.Settings.clsi = { docker: {} }
- this.Settings.clsi.docker.allowedImages = [
- 'repo/image:tag1',
- 'repo/image:tag2',
- ]
- this.res.send = sinon.stub()
- this.res.status = sinon.stub().returns({ send: this.res.send })
-
- this.CompileManager[method].reset()
- })
-
- describe('with an invalid image', function () {
- beforeEach(function () {
- this.req.query[imageNameField] = 'something/evil:1337'
- this.CompileController[method](this.req, this.res, this.next)
- })
- it('should return a 400', function () {
- expect(this.res.status.calledWith(400)).to.equal(true)
- })
- it('should not run the query', function () {
- expect(this.CompileManager[method].called).to.equal(false)
- })
- })
-
- describe('with a valid image', function () {
- beforeEach(function () {
- this.req.query[imageNameField] = 'repo/image:tag1'
- this.CompileController[method](this.req, this.res, this.next)
- })
- it('should not return a 400', function () {
- expect(this.res.status.calledWith(400)).to.equal(false)
- })
- it('should run the query', function () {
- expect(this.CompileManager[method].called).to.equal(true)
- })
- })
- })
-}
-
describe('CompileController', function () {
beforeEach(function () {
this.buildId = 'build-id-123'
@@ -61,6 +18,11 @@ describe('CompileController', function () {
clsi: {
url: 'http://clsi.example.com',
outputUrlPrefix: '/zone/b',
+ downloadHost: 'http://localhost:3013',
+ },
+ clsiCache: {
+ enabled: false,
+ url: 'http://localhost:3044',
},
},
}),
@@ -68,6 +30,11 @@ describe('CompileController', function () {
Timer: sinon.stub().returns({ done: sinon.stub() }),
},
'./ProjectPersistenceManager': (this.ProjectPersistenceManager = {}),
+ './CLSICacheHandler': {
+ notifyCLSICacheAboutBuild: sinon.stub(),
+ downloadLatestCompileCache: sinon.stub().resolves(),
+ downloadOutputDotSynctexFromCompileCache: sinon.stub().resolves(),
+ },
'./Errors': (this.Erros = Errors),
},
})
@@ -113,16 +80,21 @@ describe('CompileController', function () {
this.timings = { bar: 2 }
this.res.status = sinon.stub().returnsThis()
this.res.send = sinon.stub()
+
+ this.CompileManager.doCompileWithLock = sinon
+ .stub()
+ .callsFake((_req, stats, timings, cb) => {
+ Object.assign(stats, this.stats)
+ Object.assign(timings, this.timings)
+ cb(null, {
+ outputFiles: this.output_files,
+ buildId: this.buildId,
+ })
+ })
})
describe('successfully', function () {
beforeEach(function () {
- this.CompileManager.doCompileWithLock = sinon.stub().yields(null, {
- outputFiles: this.output_files,
- stats: this.stats,
- timings: this.timings,
- buildId: this.buildId,
- })
this.CompileController.compile(this.req, this.res)
})
@@ -157,6 +129,7 @@ describe('CompileController', function () {
url: `${this.Settings.apis.clsi.url}/project/${this.project_id}/build/${file.build}/output/${file.path}`,
...file,
})),
+ clsiCacheShard: undefined,
},
})
.should.equal(true)
@@ -166,12 +139,6 @@ describe('CompileController', function () {
describe('without a outputUrlPrefix', function () {
beforeEach(function () {
this.Settings.apis.clsi.outputUrlPrefix = ''
- this.CompileManager.doCompileWithLock = sinon.stub().yields(null, {
- outputFiles: this.output_files,
- stats: this.stats,
- timings: this.timings,
- buildId: this.buildId,
- })
this.CompileController.compile(this.req, this.res)
})
@@ -190,6 +157,7 @@ describe('CompileController', function () {
url: `${this.Settings.apis.clsi.url}/project/${this.project_id}/build/${file.build}/output/${file.path}`,
...file,
})),
+ clsiCacheShard: undefined,
},
})
.should.equal(true)
@@ -210,33 +178,36 @@ describe('CompileController', function () {
build: 1234,
},
]
- this.CompileManager.doCompileWithLock = sinon.stub().yields(null, {
- outputFiles: this.output_files,
- stats: this.stats,
- timings: this.timings,
- buildId: this.buildId,
- })
+ this.CompileManager.doCompileWithLock = sinon
+ .stub()
+ .callsFake((_req, stats, timings, cb) => {
+ Object.assign(stats, this.stats)
+ Object.assign(timings, this.timings)
+ cb(null, {
+ outputFiles: this.output_files,
+ buildId: this.buildId,
+ })
+ })
this.CompileController.compile(this.req, this.res)
})
it('should return the JSON response with status failure', function () {
this.res.status.calledWith(200).should.equal(true)
- this.res.send
- .calledWith({
- compile: {
- status: 'failure',
- error: null,
- stats: this.stats,
- timings: this.timings,
- outputUrlPrefix: '/zone/b',
- buildId: this.buildId,
- outputFiles: this.output_files.map(file => ({
- url: `${this.Settings.apis.clsi.url}/project/${this.project_id}/build/${file.build}/output/${file.path}`,
- ...file,
- })),
- },
- })
- .should.equal(true)
+ this.res.send.should.have.been.calledWith({
+ compile: {
+ status: 'failure',
+ error: null,
+ stats: this.stats,
+ timings: this.timings,
+ outputUrlPrefix: '/zone/b',
+ buildId: this.buildId,
+ outputFiles: this.output_files.map(file => ({
+ url: `${this.Settings.apis.clsi.url}/project/${this.project_id}/build/${file.build}/output/${file.path}`,
+ ...file,
+ })),
+ clsiCacheShard: undefined,
+ },
+ })
})
})
@@ -255,33 +226,36 @@ describe('CompileController', function () {
build: 1234,
},
]
- this.CompileManager.doCompileWithLock = sinon.stub().yields(null, {
- outputFiles: this.output_files,
- stats: this.stats,
- timings: this.timings,
- buildId: this.buildId,
- })
+ this.CompileManager.doCompileWithLock = sinon
+ .stub()
+ .callsFake((_req, stats, timings, cb) => {
+ Object.assign(stats, this.stats)
+ Object.assign(timings, this.timings)
+ cb(null, {
+ outputFiles: this.output_files,
+ buildId: this.buildId,
+ })
+ })
this.CompileController.compile(this.req, this.res)
})
it('should return the JSON response with status failure', function () {
this.res.status.calledWith(200).should.equal(true)
- this.res.send
- .calledWith({
- compile: {
- status: 'failure',
- error: null,
- stats: this.stats,
- buildId: this.buildId,
- timings: this.timings,
- outputUrlPrefix: '/zone/b',
- outputFiles: this.output_files.map(file => ({
- url: `${this.Settings.apis.clsi.url}/project/${this.project_id}/build/${file.build}/output/${file.path}`,
- ...file,
- })),
- },
- })
- .should.equal(true)
+ this.res.send.should.have.been.calledWith({
+ compile: {
+ status: 'failure',
+ error: null,
+ stats: this.stats,
+ buildId: this.buildId,
+ timings: this.timings,
+ outputUrlPrefix: '/zone/b',
+ outputFiles: this.output_files.map(file => ({
+ url: `${this.Settings.apis.clsi.url}/project/${this.project_id}/build/${file.build}/output/${file.path}`,
+ ...file,
+ })),
+ clsiCacheShard: undefined,
+ },
+ })
})
})
@@ -291,7 +265,11 @@ describe('CompileController', function () {
error.buildId = this.buildId
this.CompileManager.doCompileWithLock = sinon
.stub()
- .callsArgWith(1, error, null)
+ .callsFake((_req, stats, timings, cb) => {
+ Object.assign(stats, this.stats)
+ Object.assign(timings, this.timings)
+ cb(error)
+ })
this.CompileController.compile(this.req, this.res)
})
@@ -305,9 +283,9 @@ describe('CompileController', function () {
outputUrlPrefix: '/zone/b',
outputFiles: [],
buildId: this.buildId,
- // JSON.stringify will omit these
- stats: undefined,
- timings: undefined,
+ stats: this.stats,
+ timings: this.timings,
+ clsiCacheShard: undefined,
},
})
.should.equal(true)
@@ -321,7 +299,11 @@ describe('CompileController', function () {
)
this.CompileManager.doCompileWithLock = sinon
.stub()
- .callsArgWith(1, error, null)
+ .callsFake((_req, stats, timings, cb) => {
+ Object.assign(stats, this.stats)
+ Object.assign(timings, this.timings)
+ cb(error)
+ })
this.CompileController.compile(this.req, this.res)
})
@@ -334,9 +316,11 @@ describe('CompileController', function () {
error: 'too many concurrent compile requests',
outputUrlPrefix: '/zone/b',
outputFiles: [],
+ stats: this.stats,
+ timings: this.timings,
+ // JSON.stringify will omit these undefined values
buildId: undefined,
- stats: undefined,
- timings: undefined,
+ clsiCacheShard: undefined,
},
})
.should.equal(true)
@@ -349,7 +333,11 @@ describe('CompileController', function () {
this.error.timedout = true
this.CompileManager.doCompileWithLock = sinon
.stub()
- .callsArgWith(1, this.error, null)
+ .callsFake((_req, stats, timings, cb) => {
+ Object.assign(stats, this.stats)
+ Object.assign(timings, this.timings)
+ cb(this.error)
+ })
this.CompileController.compile(this.req, this.res)
})
@@ -362,10 +350,11 @@ describe('CompileController', function () {
error: this.message,
outputUrlPrefix: '/zone/b',
outputFiles: [],
- // JSON.stringify will omit these
+ stats: this.stats,
+ timings: this.timings,
+ // JSON.stringify will omit these undefined values
buildId: undefined,
- stats: undefined,
- timings: undefined,
+ clsiCacheShard: undefined,
},
})
.should.equal(true)
@@ -376,7 +365,11 @@ describe('CompileController', function () {
beforeEach(function () {
this.CompileManager.doCompileWithLock = sinon
.stub()
- .callsArgWith(1, null, [])
+ .callsFake((_req, stats, timings, cb) => {
+ Object.assign(stats, this.stats)
+ Object.assign(timings, this.timings)
+ cb(null, {})
+ })
this.CompileController.compile(this.req, this.res)
})
@@ -389,10 +382,11 @@ describe('CompileController', function () {
status: 'failure',
outputUrlPrefix: '/zone/b',
outputFiles: [],
- // JSON.stringify will omit these
+ stats: this.stats,
+ timings: this.timings,
+ // JSON.stringify will omit these undefined values
buildId: undefined,
- stats: undefined,
- timings: undefined,
+ clsiCacheShard: undefined,
},
})
.should.equal(true)
@@ -416,7 +410,7 @@ describe('CompileController', function () {
this.CompileManager.syncFromCode = sinon
.stub()
- .yields(null, (this.pdfPositions = ['mock-positions']))
+ .yields(null, (this.pdfPositions = ['mock-positions']), true)
this.CompileController.syncFromCode(this.req, this.res, this.next)
})
@@ -436,11 +430,10 @@ describe('CompileController', function () {
this.res.json
.calledWith({
pdf: this.pdfPositions,
+ downloadedFromCache: true,
})
.should.equal(true)
})
-
- tryImageNameValidation('syncFromCode', 'imageName')
})
describe('syncFromPdf', function () {
@@ -459,7 +452,7 @@ describe('CompileController', function () {
this.CompileManager.syncFromPdf = sinon
.stub()
- .yields(null, (this.codePositions = ['mock-positions']))
+ .yields(null, (this.codePositions = ['mock-positions']), true)
this.CompileController.syncFromPdf(this.req, this.res, this.next)
})
@@ -473,11 +466,10 @@ describe('CompileController', function () {
this.res.json
.calledWith({
code: this.codePositions,
+ downloadedFromCache: true,
})
.should.equal(true)
})
-
- tryImageNameValidation('syncFromPdf', 'imageName')
})
describe('wordcount', function () {
@@ -511,7 +503,5 @@ describe('CompileController', function () {
})
.should.equal(true)
})
-
- tryImageNameValidation('wordcount', 'image')
})
})
diff --git a/services/clsi/test/unit/js/CompileManagerTests.js b/services/clsi/test/unit/js/CompileManagerTests.js
index 8d7aff4910..30ef538ac3 100644
--- a/services/clsi/test/unit/js/CompileManagerTests.js
+++ b/services/clsi/test/unit/js/CompileManagerTests.js
@@ -35,7 +35,7 @@ describe('CompileManager', function () {
build: 1234,
},
]
- this.buildId = 'build-id-123'
+ this.buildId = '00000000000-0000000000000000'
this.commandOutput = 'Dummy output'
this.compileBaseDir = '/compile/dir'
this.outputBaseDir = '/output/dir'
@@ -61,7 +61,10 @@ describe('CompileManager', function () {
},
}
this.OutputCacheManager = {
+ BUILD_REGEX: /^[0-9a-f]+-[0-9a-f]+$/,
+ CACHE_SUBDIR: 'generated-files',
promises: {
+ queueDirOperation: sinon.stub().callsArg(1),
saveOutputFiles: sinon
.stub()
.resolves({ outputFiles: this.buildFiles, buildId: this.buildId }),
@@ -87,9 +90,10 @@ describe('CompileManager', function () {
execFile: sinon.stub().yields(),
}
this.CommandRunner = {
+ canRunSyncTeXInOutputDir: sinon.stub().returns(false),
promises: {
run: sinon.stub().callsFake((_1, _2, _3, _4, _5, _6, compileGroup) => {
- if (compileGroup === 'synctex') {
+ if (compileGroup === 'synctex' || compileGroup === 'synctex-output') {
return Promise.resolve({ stdout: this.commandOutput })
} else {
return Promise.resolve({
@@ -140,6 +144,12 @@ describe('CompileManager', function () {
.withArgs(Path.join(this.compileDir, 'output.synctex.gz'))
.resolves(this.fileStats)
+ this.CLSICacheHandler = {
+ notifyCLSICacheAboutBuild: sinon.stub(),
+ downloadLatestCompileCache: sinon.stub().resolves(),
+ downloadOutputDotSynctexFromCompileCache: sinon.stub().resolves(),
+ }
+
this.CompileManager = SandboxedModule.require(MODULE_PATH, {
requires: {
'./LatexRunner': this.LatexRunner,
@@ -160,6 +170,7 @@ describe('CompileManager', function () {
'./LockManager': this.LockManager,
'./SynctexOutputParser': this.SynctexOutputParser,
'fs/promises': this.fsPromises,
+ './CLSICacheHandler': this.CLSICacheHandler,
},
})
})
@@ -177,6 +188,11 @@ describe('CompileManager', function () {
flags: (this.flags = ['-file-line-error']),
compileGroup: (this.compileGroup = 'compile-group'),
stopOnFirstError: false,
+ metricsOpts: {
+ path: 'clsi-perf',
+ method: 'minimal',
+ compile: 'initial',
+ },
}
this.env = {
OVERLEAF_PROJECT_ID: this.projectId,
@@ -188,7 +204,7 @@ describe('CompileManager', function () {
const error = new Error('locked')
this.LockManager.acquire.throws(error)
await expect(
- this.CompileManager.promises.doCompileWithLock(this.request)
+ this.CompileManager.promises.doCompileWithLock(this.request, {}, {})
).to.be.rejectedWith(error)
})
@@ -206,7 +222,9 @@ describe('CompileManager', function () {
describe('normally', function () {
beforeEach(async function () {
this.result = await this.CompileManager.promises.doCompileWithLock(
- this.request
+ this.request,
+ {},
+ {}
)
})
@@ -260,7 +278,11 @@ describe('CompileManager', function () {
describe('with draft mode', function () {
beforeEach(async function () {
this.request.draft = true
- await this.CompileManager.promises.doCompileWithLock(this.request)
+ await this.CompileManager.promises.doCompileWithLock(
+ this.request,
+ {},
+ {}
+ )
})
it('should inject the draft mode header', function () {
@@ -273,7 +295,11 @@ describe('CompileManager', function () {
describe('with a check option', function () {
beforeEach(async function () {
this.request.check = 'error'
- await this.CompileManager.promises.doCompileWithLock(this.request)
+ await this.CompileManager.promises.doCompileWithLock(
+ this.request,
+ {},
+ {}
+ )
})
it('should run chktex', function () {
@@ -305,7 +331,11 @@ describe('CompileManager', function () {
beforeEach(async function () {
this.request.rootResourcePath = 'main.Rtex'
this.request.check = 'error'
- await this.CompileManager.promises.doCompileWithLock(this.request)
+ await this.CompileManager.promises.doCompileWithLock(
+ this.request,
+ {},
+ {}
+ )
})
it('should not run chktex', function () {
@@ -334,7 +364,7 @@ describe('CompileManager', function () {
error.timedout = true
this.LatexRunner.promises.runLatex.rejects(error)
await expect(
- this.CompileManager.promises.doCompileWithLock(this.request)
+ this.CompileManager.promises.doCompileWithLock(this.request, {}, {})
).to.be.rejected
})
@@ -357,7 +387,7 @@ describe('CompileManager', function () {
error.terminated = true
this.LatexRunner.promises.runLatex.rejects(error)
await expect(
- this.CompileManager.promises.doCompileWithLock(this.request)
+ this.CompileManager.promises.doCompileWithLock(this.request, {}, {})
).to.be.rejected
})
@@ -437,12 +467,83 @@ describe('CompileManager', function () {
this.compileDir,
this.Settings.clsi.docker.image,
60000,
- {}
+ {},
+ 'synctex'
)
})
it('should return the parsed output', function () {
- expect(this.result).to.deep.equal(this.records)
+ expect(this.result).to.deep.equal({
+ codePositions: this.records,
+ downloadedFromCache: false,
+ })
+ })
+ })
+
+ describe('from cache in docker', function () {
+ beforeEach(async function () {
+ this.CommandRunner.canRunSyncTeXInOutputDir.returns(true)
+ this.Settings.path.synctexBaseDir
+ .withArgs(`${this.projectId}-${this.userId}`)
+ .returns('/compile')
+
+ const errNotFound = new Error()
+ errNotFound.code = 'ENOENT'
+ this.outputDir = `${this.outputBaseDir}/${this.projectId}-${this.userId}/${this.OutputCacheManager.CACHE_SUBDIR}/${this.buildId}`
+ const filename = Path.join(this.outputDir, 'output.synctex.gz')
+ this.fsPromises.stat
+ .withArgs(this.outputDir)
+ .onFirstCall()
+ .rejects(errNotFound)
+ this.fsPromises.stat
+ .withArgs(this.outputDir)
+ .onSecondCall()
+ .resolves(this.dirStats)
+ this.fsPromises.stat.withArgs(filename).resolves(this.fileStats)
+ this.CLSICacheHandler.downloadOutputDotSynctexFromCompileCache.resolves(
+ true
+ )
+ this.result = await this.CompileManager.promises.syncFromCode(
+ this.projectId,
+ this.userId,
+ this.filename,
+ this.line,
+ this.column,
+ {
+ imageName: 'image',
+ editorId: '00000000-0000-0000-0000-000000000000',
+ buildId: this.buildId,
+ compileFromClsiCache: true,
+ }
+ )
+ })
+
+ it('should run in output dir', function () {
+ const outputFilePath = '/compile/output.pdf'
+ const inputFilePath = `/compile/${this.filename}`
+ expect(this.CommandRunner.promises.run).to.have.been.calledWith(
+ `${this.projectId}-${this.userId}`,
+ [
+ 'synctex',
+ 'view',
+ '-i',
+ `${this.line}:${this.column}:${inputFilePath}`,
+ '-o',
+ outputFilePath,
+ ],
+ this.outputDir,
+ 'image',
+ 60000,
+ {},
+ 'synctex-output'
+ )
+ })
+
+ it('should return the parsed output', function () {
+ expect(this.result).to.deep.equal({
+ codePositions: this.records,
+ downloadedFromCache: true,
+ })
})
})
@@ -455,7 +556,7 @@ describe('CompileManager', function () {
this.filename,
this.line,
this.column,
- customImageName
+ { imageName: customImageName }
)
})
@@ -475,7 +576,8 @@ describe('CompileManager', function () {
this.compileDir,
customImageName,
60000,
- {}
+ {},
+ 'synctex'
)
})
})
@@ -497,7 +599,7 @@ describe('CompileManager', function () {
this.page,
this.h,
this.v,
- ''
+ { imageName: '' }
)
})
@@ -519,7 +621,10 @@ describe('CompileManager', function () {
})
it('should return the parsed output', function () {
- expect(this.result).to.deep.equal(this.records)
+ expect(this.result).to.deep.equal({
+ pdfPositions: this.records,
+ downloadedFromCache: false,
+ })
})
})
@@ -532,7 +637,7 @@ describe('CompileManager', function () {
this.page,
this.h,
this.v,
- customImageName
+ { imageName: customImageName }
)
})
diff --git a/services/clsi/test/unit/js/DockerLockManagerTests.js b/services/clsi/test/unit/js/DockerLockManagerTests.js
index c494a85c66..f69179443c 100644
--- a/services/clsi/test/unit/js/DockerLockManagerTests.js
+++ b/services/clsi/test/unit/js/DockerLockManagerTests.js
@@ -16,7 +16,7 @@ const modulePath = require('node:path').join(
'../../../app/js/DockerLockManager'
)
-describe('LockManager', function () {
+describe('DockerLockManager', function () {
beforeEach(function () {
return (this.LockManager = SandboxedModule.require(modulePath, {
requires: {
diff --git a/services/clsi/test/unit/js/DockerRunnerTests.js b/services/clsi/test/unit/js/DockerRunnerTests.js
index 6c377d102b..d70aab52c7 100644
--- a/services/clsi/test/unit/js/DockerRunnerTests.js
+++ b/services/clsi/test/unit/js/DockerRunnerTests.js
@@ -76,8 +76,11 @@ describe('DockerRunner', function () {
this.env = {}
this.callback = sinon.stub()
this.project_id = 'project-id-123'
- this.volumes = { '/local/compile/directory': '/compile' }
+ this.volumes = { '/some/host/dir/compiles/directory': '/compile' }
this.Settings.clsi.docker.image = this.defaultImage = 'default-image'
+ this.Settings.path.sandboxedCompilesHostDirCompiles =
+ '/some/host/dir/compiles'
+ this.Settings.path.sandboxedCompilesHostDirOutput = '/some/host/dir/output'
this.compileGroup = 'compile-group'
return (this.Settings.clsi.docker.env = { PATH: 'mock-path' })
})
@@ -151,9 +154,8 @@ describe('DockerRunner', function () {
})
})
- describe('when path.sandboxedCompilesHostDir is set', function () {
+ describe('standard compile', function () {
beforeEach(function () {
- this.Settings.path.sandboxedCompilesHostDir = '/some/host/dir/compiles'
this.directory = '/var/lib/overleaf/data/compiles/xyz'
this.DockerRunner._runAndWaitForContainer = sinon
.stub()
@@ -183,6 +185,99 @@ describe('DockerRunner', function () {
})
})
+ describe('synctex-output', function () {
+ beforeEach(function () {
+ this.directory = '/var/lib/overleaf/data/output/xyz/generated-files/id'
+ this.DockerRunner._runAndWaitForContainer = sinon
+ .stub()
+ .callsArgWith(3, null, (this.output = 'mock-output'))
+ this.DockerRunner.run(
+ this.project_id,
+ this.command,
+ this.directory,
+ this.image,
+ this.timeout,
+ this.env,
+ 'synctex-output',
+ this.callback
+ )
+ })
+
+ it('should re-write the bind directory and set ro flag', function () {
+ const volumes =
+ this.DockerRunner._runAndWaitForContainer.lastCall.args[1]
+ expect(volumes).to.deep.equal({
+ '/some/host/dir/output/xyz/generated-files/id': '/compile:ro',
+ })
+ })
+
+ it('should call the callback', function () {
+ this.callback.calledWith(null, this.output).should.equal(true)
+ })
+ })
+
+ describe('synctex', function () {
+ beforeEach(function () {
+ this.directory = '/var/lib/overleaf/data/compile/xyz'
+ this.DockerRunner._runAndWaitForContainer = sinon
+ .stub()
+ .callsArgWith(3, null, (this.output = 'mock-output'))
+ this.DockerRunner.run(
+ this.project_id,
+ this.command,
+ this.directory,
+ this.image,
+ this.timeout,
+ this.env,
+ 'synctex',
+ this.callback
+ )
+ })
+
+ it('should re-write the bind directory', function () {
+ const volumes =
+ this.DockerRunner._runAndWaitForContainer.lastCall.args[1]
+ expect(volumes).to.deep.equal({
+ '/some/host/dir/compiles/xyz': '/compile:ro',
+ })
+ })
+
+ it('should call the callback', function () {
+ this.callback.calledWith(null, this.output).should.equal(true)
+ })
+ })
+
+ describe('wordcount', function () {
+ beforeEach(function () {
+ this.directory = '/var/lib/overleaf/data/compile/xyz'
+ this.DockerRunner._runAndWaitForContainer = sinon
+ .stub()
+ .callsArgWith(3, null, (this.output = 'mock-output'))
+ this.DockerRunner.run(
+ this.project_id,
+ this.command,
+ this.directory,
+ this.image,
+ this.timeout,
+ this.env,
+ 'wordcount',
+ this.callback
+ )
+ })
+
+ it('should re-write the bind directory', function () {
+ const volumes =
+ this.DockerRunner._runAndWaitForContainer.lastCall.args[1]
+ expect(volumes).to.deep.equal({
+ '/some/host/dir/compiles/xyz': '/compile:ro',
+ })
+ })
+
+ it('should call the callback', function () {
+ this.callback.calledWith(null, this.output).should.equal(true)
+ })
+ })
+
describe('when the run throws an error', function () {
beforeEach(function () {
let firstTime = true
@@ -390,7 +485,7 @@ describe('DockerRunner', function () {
const options =
this.DockerRunner._runAndWaitForContainer.lastCall.args[0]
return expect(options.HostConfig).to.deep.include({
- Binds: ['/local/compile/directory:/compile:rw'],
+ Binds: ['/some/host/dir/compiles/directory:/compile:rw'],
LogConfig: { Type: 'none', Config: {} },
CapDrop: 'ALL',
SecurityOpt: ['no-new-privileges'],
@@ -562,82 +657,6 @@ describe('DockerRunner', function () {
})
})
- describe('when a volume does not exist', function () {
- beforeEach(function () {
- this.fs.stat = sinon.stub().yields(new Error('no such path'))
- return this.DockerRunner.startContainer(
- this.options,
- this.volumes,
- this.attachStreamHandler,
- this.callback
- )
- })
-
- it('should not try to create the container', function () {
- return this.createContainer.called.should.equal(false)
- })
-
- it('should call the callback with an error', function () {
- this.callback.calledWith(sinon.match(Error)).should.equal(true)
- })
- })
-
- describe('when a volume exists but is not a directory', function () {
- beforeEach(function () {
- this.fs.stat = sinon.stub().yields(null, {
- isDirectory() {
- return false
- },
- })
- return this.DockerRunner.startContainer(
- this.options,
- this.volumes,
- this.attachStreamHandler,
- this.callback
- )
- })
-
- it('should not try to create the container', function () {
- return this.createContainer.called.should.equal(false)
- })
-
- it('should call the callback with an error', function () {
- this.callback.calledWith(sinon.match(Error)).should.equal(true)
- })
- })
-
- describe('when a volume does not exist, but sibling-containers are used', function () {
- beforeEach(function () {
- this.fs.stat = sinon.stub().yields(new Error('no such path'))
- this.Settings.path.sandboxedCompilesHostDir = '/some/path'
- this.container.start = sinon.stub().yields()
- return this.DockerRunner.startContainer(
- this.options,
- this.volumes,
- () => {},
- this.callback
- )
- })
-
- afterEach(function () {
- return delete this.Settings.path.sandboxedCompilesHostDir
- })
-
- it('should start the container with the given name', function () {
- this.getContainer.calledWith(this.options.name).should.equal(true)
- return this.container.start.called.should.equal(true)
- })
-
- it('should not try to create the container', function () {
- return this.createContainer.called.should.equal(false)
- })
-
- return it('should call the callback', function () {
- this.callback.called.should.equal(true)
- return this.callback.calledWith(new Error()).should.equal(false)
- })
- })
-
return describe('when the container tries to be created, but already has been (race condition)', function () {})
})
diff --git a/services/clsi/test/unit/js/LockManagerTests.js b/services/clsi/test/unit/js/LockManagerTests.js
index 64238ea0c6..7005b3e5a3 100644
--- a/services/clsi/test/unit/js/LockManagerTests.js
+++ b/services/clsi/test/unit/js/LockManagerTests.js
@@ -21,6 +21,7 @@ describe('LockManager', function () {
compileConcurrencyLimit: 5,
}),
'./Errors': (this.Erros = Errors),
+ './RequestParser': { MAX_TIMEOUT: 600 },
},
})
})
diff --git a/services/clsi/test/unit/js/ProjectPersistenceManagerTests.js b/services/clsi/test/unit/js/ProjectPersistenceManagerTests.js
index b36b9245e4..4f42411fba 100644
--- a/services/clsi/test/unit/js/ProjectPersistenceManagerTests.js
+++ b/services/clsi/test/unit/js/ProjectPersistenceManagerTests.js
@@ -21,11 +21,16 @@ const tk = require('timekeeper')
describe('ProjectPersistenceManager', function () {
beforeEach(function () {
+ this.fsPromises = {
+ statfs: sinon.stub(),
+ }
+
this.ProjectPersistenceManager = SandboxedModule.require(modulePath, {
requires: {
+ '@overleaf/metrics': (this.Metrics = { gauge: sinon.stub() }),
'./UrlCache': (this.UrlCache = {}),
'./CompileManager': (this.CompileManager = {}),
- diskusage: (this.diskusage = { check: sinon.stub() }),
+ fs: { promises: this.fsPromises },
'@overleaf/settings': (this.settings = {
project_cache_length_ms: 1000,
path: {
@@ -43,12 +48,17 @@ describe('ProjectPersistenceManager', function () {
describe('refreshExpiryTimeout', function () {
it('should leave expiry alone if plenty of disk', function (done) {
- this.diskusage.check.resolves({
- available: 40,
- total: 100,
+ this.fsPromises.statfs.resolves({
+ blocks: 100,
+ bsize: 1,
+ bavail: 40,
})
this.ProjectPersistenceManager.refreshExpiryTimeout(() => {
+ this.Metrics.gauge.should.have.been.calledWith(
+ 'disk_available_percent',
+ 40
+ )
this.ProjectPersistenceManager.EXPIRY_TIMEOUT.should.equal(
this.settings.project_cache_length_ms
)
@@ -57,31 +67,41 @@ describe('ProjectPersistenceManager', function () {
})
it('should drop EXPIRY_TIMEOUT 10% if low disk usage', function (done) {
- this.diskusage.check.resolves({
- available: 5,
- total: 100,
+ this.fsPromises.statfs.resolves({
+ blocks: 100,
+ bsize: 1,
+ bavail: 5,
})
this.ProjectPersistenceManager.refreshExpiryTimeout(() => {
+ this.Metrics.gauge.should.have.been.calledWith(
+ 'disk_available_percent',
+ 5
+ )
this.ProjectPersistenceManager.EXPIRY_TIMEOUT.should.equal(900)
done()
})
})
it('should not drop EXPIRY_TIMEOUT to below 50% of project_cache_length_ms', function (done) {
- this.diskusage.check.resolves({
- available: 5,
- total: 100,
+ this.fsPromises.statfs.resolves({
+ blocks: 100,
+ bsize: 1,
+ bavail: 5,
})
this.ProjectPersistenceManager.EXPIRY_TIMEOUT = 500
this.ProjectPersistenceManager.refreshExpiryTimeout(() => {
+ this.Metrics.gauge.should.have.been.calledWith(
+ 'disk_available_percent',
+ 5
+ )
this.ProjectPersistenceManager.EXPIRY_TIMEOUT.should.equal(500)
done()
})
})
it('should not modify EXPIRY_TIMEOUT if there is an error getting disk values', function (done) {
- this.diskusage.check.throws(new Error())
+ this.fsPromises.statfs.rejects(new Error())
this.ProjectPersistenceManager.refreshExpiryTimeout(() => {
this.ProjectPersistenceManager.EXPIRY_TIMEOUT.should.equal(1000)
done()
diff --git a/services/clsi/test/unit/js/RequestParserTests.js b/services/clsi/test/unit/js/RequestParserTests.js
index 5fdae7ed3d..437c3c4fbe 100644
--- a/services/clsi/test/unit/js/RequestParserTests.js
+++ b/services/clsi/test/unit/js/RequestParserTests.js
@@ -30,6 +30,7 @@ describe('RequestParser', function () {
this.RequestParser = SandboxedModule.require(modulePath, {
requires: {
'@overleaf/settings': (this.settings = {}),
+ './OutputCacheManager': { BUILD_REGEX: /^[0-9a-f]+-[0-9a-f]+$/ },
},
})
})
@@ -274,6 +275,37 @@ describe('RequestParser', function () {
})
})
+ describe('with a valid buildId', function () {
+ beforeEach(function (done) {
+ this.validRequest.compile.options.buildId = '195a4869176-a4ad60bee7bf35e4'
+ this.RequestParser.parse(this.validRequest, (error, data) => {
+ if (error) return done(error)
+ this.data = data
+ done()
+ })
+ })
+
+ it('should return an error', function () {
+ this.data.buildId.should.equal('195a4869176-a4ad60bee7bf35e4')
+ })
+ })
+
+ describe('with a bad buildId', function () {
+ beforeEach(function () {
+ this.validRequest.compile.options.buildId = 'foo/bar'
+ this.RequestParser.parse(this.validRequest, this.callback)
+ })
+
+ it('should return an error', function () {
+ this.callback
+ .calledWithMatch({
+ message:
+ 'buildId attribute does not match regex /^[0-9a-f]+-[0-9a-f]+$/',
+ })
+ .should.equal(true)
+ })
+ })
+
describe('with a resource with a valid date', function () {
beforeEach(function () {
this.date = '12:00 01/02/03'
diff --git a/services/contacts/.nvmrc b/services/contacts/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/contacts/.nvmrc
+++ b/services/contacts/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/contacts/Dockerfile b/services/contacts/Dockerfile
index 56aad9622d..b59cada7b3 100644
--- a/services/contacts/Dockerfile
+++ b/services/contacts/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/contacts
diff --git a/services/contacts/Makefile b/services/contacts/Makefile
index 5808e5bc35..3309e298e8 100644
--- a/services/contacts/Makefile
+++ b/services/contacts/Makefile
@@ -32,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -63,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -98,13 +116,6 @@ test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) up -d mongo
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) exec -T mongo sh -c ' \
- while ! mongosh --eval "db.version()" > /dev/null; do \
- echo "Waiting for Mongo..."; \
- sleep 1; \
- done; \
- mongosh --eval "rs.initiate({ _id: \"overleaf\", members: [ { _id: 0, host: \"mongo:27017\" } ] })"'
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif
@@ -137,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/contacts/buildscript.txt b/services/contacts/buildscript.txt
index 23c00d7a3e..b20764246c 100644
--- a/services/contacts/buildscript.txt
+++ b/services/contacts/buildscript.txt
@@ -4,6 +4,6 @@ contacts
--env-add=
--env-pass-through=
--esmock-loader=True
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=False
---script-version=4.5.0
+--script-version=4.7.0
diff --git a/services/contacts/docker-compose.ci.yml b/services/contacts/docker-compose.ci.yml
index 6f1a608534..ca3303a079 100644
--- a/services/contacts/docker-compose.ci.yml
+++ b/services/contacts/docker-compose.ci.yml
@@ -24,10 +24,13 @@ services:
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
+ volumes:
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
user: node
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run test:acceptance
@@ -39,9 +42,14 @@ services:
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/contacts/docker-compose.yml b/services/contacts/docker-compose.yml
index fe52658ba6..474ea224f8 100644
--- a/services/contacts/docker-compose.yml
+++ b/services/contacts/docker-compose.yml
@@ -6,7 +6,7 @@ version: "2.3"
services:
test_unit:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/contacts
- ../../node_modules:/overleaf/node_modules
@@ -14,37 +14,45 @@ services:
working_dir: /overleaf/services/contacts
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/contacts
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
working_dir: /overleaf/services/contacts
environment:
ELASTIC_SEARCH_DSN: es:9200
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run --silent test:acceptance
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/contacts/package.json b/services/contacts/package.json
index 5b30175c66..db707e55c9 100644
--- a/services/contacts/package.json
+++ b/services/contacts/package.json
@@ -6,9 +6,9 @@
"main": "app.js",
"scripts": {
"start": "node app.js",
- "test:acceptance:_run": "LOG_LEVEL=fatal mocha --loader=esmock --recursive --reporter spec --timeout 15000 --exit $@ test/acceptance/js",
+ "test:acceptance:_run": "mocha --loader=esmock --recursive --reporter spec --timeout 15000 --exit $@ test/acceptance/js",
"test:acceptance": "npm run test:acceptance:_run -- --grep=$MOCHA_GREP",
- "test:unit:_run": "LOG_LEVEL=fatal mocha --loader=esmock --recursive --reporter spec $@ test/unit/js",
+ "test:unit:_run": "mocha --loader=esmock --recursive --reporter spec $@ test/unit/js",
"test:unit": "npm run test:unit:_run -- --grep=$MOCHA_GREP",
"nodemon": "node --watch app.js",
"lint": "eslint --max-warnings 0 --format unix .",
@@ -24,8 +24,8 @@
"async": "^3.2.5",
"body-parser": "^1.20.3",
"bunyan": "^1.8.15",
- "express": "^4.21.0",
- "mongodb": "6.10.0",
+ "express": "^4.21.2",
+ "mongodb": "6.12.0",
"request": "~2.88.2",
"underscore": "~1.13.1"
},
@@ -33,7 +33,7 @@
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"esmock": "^2.6.3",
- "mocha": "^10.2.0",
+ "mocha": "^11.1.0",
"sinon": "~9.0.1",
"sinon-chai": "^3.7.0",
"typescript": "^5.0.4"
diff --git a/services/docstore/.gitignore b/services/docstore/.gitignore
deleted file mode 100644
index 84bf300f7f..0000000000
--- a/services/docstore/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-node_modules
-forever
-
-# managed by dev-environment$ bin/update_build_scripts
-.npmrc
-
-# Jetbrains IDEs
-.idea
diff --git a/services/docstore/.nvmrc b/services/docstore/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/docstore/.nvmrc
+++ b/services/docstore/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/docstore/Dockerfile b/services/docstore/Dockerfile
index 68298d27d1..f24f9ddaf7 100644
--- a/services/docstore/Dockerfile
+++ b/services/docstore/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/docstore
diff --git a/services/docstore/Makefile b/services/docstore/Makefile
index d96ab18280..2b3596b0b4 100644
--- a/services/docstore/Makefile
+++ b/services/docstore/Makefile
@@ -32,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -63,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -98,13 +116,6 @@ test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) up -d mongo
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) exec -T mongo sh -c ' \
- while ! mongosh --eval "db.version()" > /dev/null; do \
- echo "Waiting for Mongo..."; \
- sleep 1; \
- done; \
- mongosh --eval "rs.initiate({ _id: \"overleaf\", members: [ { _id: 0, host: \"mongo:27017\" } ] })"'
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif
@@ -137,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/docstore/app.js b/services/docstore/app.js
index b4a26fc24a..ef755c4bb1 100644
--- a/services/docstore/app.js
+++ b/services/docstore/app.js
@@ -50,6 +50,14 @@ app.param('doc_id', function (req, res, next, docId) {
app.get('/project/:project_id/doc-deleted', HttpController.getAllDeletedDocs)
app.get('/project/:project_id/doc', HttpController.getAllDocs)
app.get('/project/:project_id/ranges', HttpController.getAllRanges)
+app.get(
+ '/project/:project_id/comment-thread-ids',
+ HttpController.getCommentThreadIds
+)
+app.get(
+ '/project/:project_id/tracked-changes-user-ids',
+ HttpController.getTrackedChangesUserIds
+)
app.get('/project/:project_id/has-ranges', HttpController.projectHasRanges)
app.get('/project/:project_id/doc/:doc_id', HttpController.getDoc)
app.get('/project/:project_id/doc/:doc_id/deleted', HttpController.isDocDeleted)
@@ -88,14 +96,17 @@ app.get('/status', (req, res) => res.send('docstore is alive'))
app.use(handleValidationErrors())
app.use(function (error, req, res, next) {
- logger.error({ err: error, req }, 'request errored')
if (error instanceof Errors.NotFoundError) {
+ logger.warn({ req }, 'not found')
res.sendStatus(404)
} else if (error instanceof Errors.DocModifiedError) {
+ logger.warn({ req }, 'conflict: doc modified')
res.sendStatus(409)
} else if (error instanceof Errors.DocVersionDecrementedError) {
+ logger.warn({ req }, 'conflict: doc version decremented')
res.sendStatus(409)
} else {
+ logger.error({ err: error, req }, 'request errored')
res.status(500).send('Oops, something went wrong')
}
})
diff --git a/services/docstore/app/js/DocArchiveManager.js b/services/docstore/app/js/DocArchiveManager.js
index 238672e711..d03ee161a8 100644
--- a/services/docstore/app/js/DocArchiveManager.js
+++ b/services/docstore/app/js/DocArchiveManager.js
@@ -1,5 +1,4 @@
-const { callbackify } = require('node:util')
-const MongoManager = require('./MongoManager').promises
+const MongoManager = require('./MongoManager')
const Errors = require('./Errors')
const logger = require('@overleaf/logger')
const Settings = require('@overleaf/settings')
@@ -8,28 +7,12 @@ const { ReadableString } = require('@overleaf/stream-utils')
const RangeManager = require('./RangeManager')
const PersistorManager = require('./PersistorManager')
const pMap = require('p-map')
+const { streamToBuffer } = require('./StreamToBuffer')
const { BSON } = require('mongodb-legacy')
const PARALLEL_JOBS = Settings.parallelArchiveJobs
const UN_ARCHIVE_BATCH_SIZE = Settings.unArchiveBatchSize
-module.exports = {
- archiveAllDocs: callbackify(archiveAllDocs),
- archiveDoc: callbackify(archiveDoc),
- unArchiveAllDocs: callbackify(unArchiveAllDocs),
- unarchiveDoc: callbackify(unarchiveDoc),
- destroyProject: callbackify(destroyProject),
- getDoc: callbackify(getDoc),
- promises: {
- archiveAllDocs,
- archiveDoc,
- unArchiveAllDocs,
- unarchiveDoc,
- destroyProject,
- getDoc,
- },
-}
-
async function archiveAllDocs(projectId) {
if (!_isArchivingEnabled()) {
return
@@ -61,6 +44,8 @@ async function archiveDoc(projectId, docId) {
throw new Error('doc has no lines')
}
+ RangeManager.fixCommentIds(doc)
+
// warn about any oversized docs already in mongo
const linesSize = BSON.calculateObjectSize(doc.lines || {})
const rangesSize = BSON.calculateObjectSize(doc.ranges || {})
@@ -136,7 +121,7 @@ async function getDoc(projectId, docId) {
key
)
stream.resume()
- const buffer = await _streamToBuffer(projectId, docId, stream)
+ const buffer = await streamToBuffer(projectId, docId, stream)
const md5 = crypto.createHash('md5').update(buffer).digest('hex')
if (sourceMd5 !== md5) {
throw new Errors.Md5MismatchError('md5 mismatch when downloading doc', {
@@ -187,34 +172,6 @@ async function destroyProject(projectId) {
await Promise.all(tasks)
}
-async function _streamToBuffer(projectId, docId, stream) {
- const chunks = []
- let size = 0
- let logged = false
- const logIfTooLarge = finishedReading => {
- if (size <= Settings.max_doc_length) return
- // Log progress once and then again at the end.
- if (logged && !finishedReading) return
- logger.warn(
- { projectId, docId, size, finishedReading },
- 'potentially large doc pulled down from gcs'
- )
- logged = true
- }
- return await new Promise((resolve, reject) => {
- stream.on('data', chunk => {
- size += chunk.byteLength
- logIfTooLarge(false)
- chunks.push(chunk)
- })
- stream.on('error', reject)
- stream.on('end', () => {
- logIfTooLarge(true)
- resolve(Buffer.concat(chunks))
- })
- })
-}
-
function _deserializeArchivedDoc(buffer) {
const doc = JSON.parse(buffer)
@@ -252,3 +209,12 @@ function _isArchivingEnabled() {
return true
}
+
+module.exports = {
+ archiveAllDocs,
+ archiveDoc,
+ unArchiveAllDocs,
+ unarchiveDoc,
+ destroyProject,
+ getDoc,
+}
diff --git a/services/docstore/app/js/DocManager.js b/services/docstore/app/js/DocManager.js
index a9ed99425c..c9e8dadc2c 100644
--- a/services/docstore/app/js/DocManager.js
+++ b/services/docstore/app/js/DocManager.js
@@ -5,7 +5,6 @@ const _ = require('lodash')
const DocArchive = require('./DocArchiveManager')
const RangeManager = require('./RangeManager')
const Settings = require('@overleaf/settings')
-const { callbackifyAll } = require('@overleaf/promise-utils')
const { setTimeout } = require('node:timers/promises')
/**
@@ -29,7 +28,7 @@ const DocManager = {
throw new Error('must include inS3 when getting doc')
}
- const doc = await MongoManager.promises.findDoc(projectId, docId, filter)
+ const doc = await MongoManager.findDoc(projectId, docId, filter)
if (doc == null) {
throw new Errors.NotFoundError(
@@ -38,15 +37,19 @@ const DocManager = {
}
if (doc.inS3) {
- await DocArchive.promises.unarchiveDoc(projectId, docId)
+ await DocArchive.unarchiveDoc(projectId, docId)
return await DocManager._getDoc(projectId, docId, filter)
}
+ if (filter.ranges) {
+ RangeManager.fixCommentIds(doc)
+ }
+
return doc
},
async isDocDeleted(projectId, docId) {
- const doc = await MongoManager.promises.findDoc(projectId, docId, {
+ const doc = await MongoManager.findDoc(projectId, docId, {
deleted: true,
})
@@ -74,7 +77,7 @@ const DocManager = {
// returns the doc without any version information
async _peekRawDoc(projectId, docId) {
- const doc = await MongoManager.promises.findDoc(projectId, docId, {
+ const doc = await MongoManager.findDoc(projectId, docId, {
lines: true,
rev: true,
deleted: true,
@@ -91,7 +94,7 @@ const DocManager = {
if (doc.inS3) {
// skip the unarchiving to mongo when getting a doc
- const archivedDoc = await DocArchive.promises.getDoc(projectId, docId)
+ const archivedDoc = await DocArchive.getDoc(projectId, docId)
Object.assign(doc, archivedDoc)
}
@@ -102,7 +105,7 @@ const DocManager = {
// without unarchiving it (avoids unnecessary writes to mongo)
async peekDoc(projectId, docId) {
const doc = await DocManager._peekRawDoc(projectId, docId)
- await MongoManager.promises.checkRevUnchanged(doc)
+ await MongoManager.checkRevUnchanged(doc)
return doc
},
@@ -111,16 +114,18 @@ const DocManager = {
lines: true,
inS3: true,
})
- return doc
+ if (!doc) throw new Errors.NotFoundError()
+ if (!Array.isArray(doc.lines)) throw new Errors.DocWithoutLinesError()
+ return doc.lines.join('\n')
},
async getAllDeletedDocs(projectId, filter) {
- return await MongoManager.promises.getProjectsDeletedDocs(projectId, filter)
+ return await MongoManager.getProjectsDeletedDocs(projectId, filter)
},
async getAllNonDeletedDocs(projectId, filter) {
- await DocArchive.promises.unArchiveAllDocs(projectId)
- const docs = await MongoManager.promises.getProjectsDocs(
+ await DocArchive.unArchiveAllDocs(projectId)
+ const docs = await MongoManager.getProjectsDocs(
projectId,
{ include_deleted: false },
filter
@@ -128,15 +133,46 @@ const DocManager = {
if (docs == null) {
throw new Errors.NotFoundError(`No docs for project ${projectId}`)
}
+ if (filter.ranges) {
+ for (const doc of docs) {
+ RangeManager.fixCommentIds(doc)
+ }
+ }
return docs
},
+ async getCommentThreadIds(projectId) {
+ const docs = await DocManager.getAllNonDeletedDocs(projectId, {
+ _id: true,
+ ranges: true,
+ })
+ const byDoc = new Map()
+ for (const doc of docs) {
+ const ids = new Set()
+ for (const comment of doc.ranges?.comments || []) {
+ ids.add(comment.op.t)
+ }
+ if (ids.size > 0) byDoc.set(doc._id.toString(), Array.from(ids))
+ }
+ return Object.fromEntries(byDoc.entries())
+ },
+
+ async getTrackedChangesUserIds(projectId) {
+ const docs = await DocManager.getAllNonDeletedDocs(projectId, {
+ ranges: true,
+ })
+ const userIds = new Set()
+ for (const doc of docs) {
+ for (const change of doc.ranges?.changes || []) {
+ if (change.metadata.user_id === 'anonymous-user') continue
+ userIds.add(change.metadata.user_id)
+ }
+ }
+ return Array.from(userIds)
+ },
+
async projectHasRanges(projectId) {
- const docs = await MongoManager.promises.getProjectsDocs(
- projectId,
- {},
- { _id: 1 }
- )
+ const docs = await MongoManager.getProjectsDocs(projectId, {}, { _id: 1 })
const docIds = docs.map(doc => doc._id)
for (const docId of docIds) {
const doc = await DocManager.peekDoc(projectId, docId)
@@ -247,7 +283,7 @@ const DocManager = {
}
modified = true
- await MongoManager.promises.upsertIntoDocCollection(
+ await MongoManager.upsertIntoDocCollection(
projectId,
docId,
doc?.rev,
@@ -262,11 +298,7 @@ const DocManager = {
async patchDoc(projectId, docId, meta) {
const projection = { _id: 1, deleted: true }
- const doc = await MongoManager.promises.findDoc(
- projectId,
- docId,
- projection
- )
+ const doc = await MongoManager.findDoc(projectId, docId, projection)
if (!doc) {
throw new Errors.NotFoundError(
`No such project/doc to delete: ${projectId}/${docId}`
@@ -275,7 +307,7 @@ const DocManager = {
if (meta.deleted && Settings.docstore.archiveOnSoftDelete) {
// The user will not read this doc anytime soon. Flush it out of mongo.
- DocArchive.promises.archiveDoc(projectId, docId).catch(err => {
+ DocArchive.archiveDoc(projectId, docId).catch(err => {
logger.warn(
{ projectId, docId, err },
'archiving a single doc in the background failed'
@@ -283,15 +315,8 @@ const DocManager = {
})
}
- await MongoManager.promises.patchDoc(projectId, docId, meta)
+ await MongoManager.patchDoc(projectId, docId, meta)
},
}
-module.exports = {
- ...callbackifyAll(DocManager, {
- multiResult: {
- updateDoc: ['modified', 'rev'],
- },
- }),
- promises: DocManager,
-}
+module.exports = DocManager
diff --git a/services/docstore/app/js/Errors.js b/services/docstore/app/js/Errors.js
index bbdbe75c08..7b150cc0db 100644
--- a/services/docstore/app/js/Errors.js
+++ b/services/docstore/app/js/Errors.js
@@ -10,10 +10,13 @@ class DocRevValueError extends OError {}
class DocVersionDecrementedError extends OError {}
+class DocWithoutLinesError extends OError {}
+
module.exports = {
Md5MismatchError,
DocModifiedError,
DocRevValueError,
DocVersionDecrementedError,
+ DocWithoutLinesError,
...Errors,
}
diff --git a/services/docstore/app/js/HealthChecker.js b/services/docstore/app/js/HealthChecker.js
index 34cd5c973c..a5b7ad7e9a 100644
--- a/services/docstore/app/js/HealthChecker.js
+++ b/services/docstore/app/js/HealthChecker.js
@@ -1,67 +1,35 @@
-// TODO: This file was created by bulk-decaffeinate.
-// Fix any style issues and re-enable lint.
-/*
- * decaffeinate suggestions:
- * DS102: Remove unnecessary code created because of implicit returns
- * DS207: Consider shorter variations of null checks
- * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
- */
const { db, ObjectId } = require('./mongodb')
-const request = require('request')
-const async = require('async')
const _ = require('lodash')
const crypto = require('node:crypto')
const settings = require('@overleaf/settings')
const { port } = settings.internal.docstore
const logger = require('@overleaf/logger')
+const { fetchNothing, fetchJson } = require('@overleaf/fetch-utils')
-module.exports = {
- check(callback) {
- const docId = new ObjectId()
- const projectId = new ObjectId(settings.docstore.healthCheck.project_id)
- const url = `http://127.0.0.1:${port}/project/${projectId}/doc/${docId}`
- const lines = [
- 'smoke test - delete me',
- `${crypto.randomBytes(32).toString('hex')}`,
- ]
- const getOpts = () => ({
- url,
- timeout: 3000,
+async function check() {
+ const docId = new ObjectId()
+ const projectId = new ObjectId(settings.docstore.healthCheck.project_id)
+ const url = `http://127.0.0.1:${port}/project/${projectId}/doc/${docId}`
+ const lines = [
+ 'smoke test - delete me',
+ `${crypto.randomBytes(32).toString('hex')}`,
+ ]
+ logger.debug({ lines, url, docId, projectId }, 'running health check')
+ let body
+ try {
+ await fetchNothing(url, {
+ method: 'POST',
+ json: { lines, version: 42, ranges: {} },
+ signal: AbortSignal.timeout(3_000),
})
- logger.debug({ lines, url, docId, projectId }, 'running health check')
- const jobs = [
- function (cb) {
- const opts = getOpts()
- opts.json = { lines, version: 42, ranges: {} }
- return request.post(opts, cb)
- },
- function (cb) {
- const opts = getOpts()
- opts.json = true
- return request.get(opts, function (err, res, body) {
- if (err != null) {
- logger.err({ err }, 'docstore returned a error in health check get')
- return cb(err)
- } else if (res == null) {
- return cb(new Error('no response from docstore with get check'))
- } else if ((res != null ? res.statusCode : undefined) !== 200) {
- return cb(new Error(`status code not 200, its ${res.statusCode}`))
- } else if (
- _.isEqual(body != null ? body.lines : undefined, lines) &&
- (body != null ? body._id : undefined) === docId.toString()
- ) {
- return cb()
- } else {
- return cb(
- new Error(
- `health check lines not equal ${body.lines} != ${lines}`
- )
- )
- }
- })
- },
- cb => db.docs.deleteOne({ _id: docId, project_id: projectId }, cb),
- ]
- return async.series(jobs, callback)
- },
+ body = await fetchJson(url, { signal: AbortSignal.timeout(3_000) })
+ } finally {
+ await db.docs.deleteOne({ _id: docId, project_id: projectId })
+ }
+ if (!_.isEqual(body?.lines, lines)) {
+ throw new Error(`health check lines not equal ${body.lines} != ${lines}`)
+ }
+}
+module.exports = {
+ check,
}
diff --git a/services/docstore/app/js/HttpController.js b/services/docstore/app/js/HttpController.js
index 1c4e137033..50c4302aeb 100644
--- a/services/docstore/app/js/HttpController.js
+++ b/services/docstore/app/js/HttpController.js
@@ -4,143 +4,104 @@ const DocArchive = require('./DocArchiveManager')
const HealthChecker = require('./HealthChecker')
const Errors = require('./Errors')
const Settings = require('@overleaf/settings')
+const { expressify } = require('@overleaf/promise-utils')
-function getDoc(req, res, next) {
+async function getDoc(req, res) {
const { doc_id: docId, project_id: projectId } = req.params
const includeDeleted = req.query.include_deleted === 'true'
logger.debug({ projectId, docId }, 'getting doc')
- DocManager.getFullDoc(projectId, docId, function (error, doc) {
- if (error) {
- return next(error)
- }
- logger.debug({ docId, projectId }, 'got doc')
- if (doc == null) {
- res.sendStatus(404)
- } else if (doc.deleted && !includeDeleted) {
- res.sendStatus(404)
- } else {
- res.json(_buildDocView(doc))
- }
- })
+ const doc = await DocManager.getFullDoc(projectId, docId)
+ logger.debug({ docId, projectId }, 'got doc')
+ if (doc.deleted && !includeDeleted) {
+ res.sendStatus(404)
+ } else {
+ res.json(_buildDocView(doc))
+ }
}
-function peekDoc(req, res, next) {
+async function peekDoc(req, res) {
const { doc_id: docId, project_id: projectId } = req.params
logger.debug({ projectId, docId }, 'peeking doc')
- DocManager.peekDoc(projectId, docId, function (error, doc) {
- if (error) {
- return next(error)
- }
- if (doc == null) {
- res.sendStatus(404)
- } else {
- res.setHeader('x-doc-status', doc.inS3 ? 'archived' : 'active')
- res.json(_buildDocView(doc))
- }
- })
+ const doc = await DocManager.peekDoc(projectId, docId)
+ res.setHeader('x-doc-status', doc.inS3 ? 'archived' : 'active')
+ res.json(_buildDocView(doc))
}
-function isDocDeleted(req, res, next) {
+async function isDocDeleted(req, res) {
const { doc_id: docId, project_id: projectId } = req.params
- DocManager.isDocDeleted(projectId, docId, function (error, deleted) {
- if (error) {
- return next(error)
- }
- res.json({ deleted })
- })
+ const deleted = await DocManager.isDocDeleted(projectId, docId)
+ res.json({ deleted })
}
-function getRawDoc(req, res, next) {
+async function getRawDoc(req, res) {
const { doc_id: docId, project_id: projectId } = req.params
logger.debug({ projectId, docId }, 'getting raw doc')
- DocManager.getDocLines(projectId, docId, function (error, doc) {
- if (error) {
- return next(error)
- }
- if (doc == null) {
- res.sendStatus(404)
- } else {
- res.setHeader('content-type', 'text/plain')
- res.send(_buildRawDocView(doc))
- }
- })
+ const content = await DocManager.getDocLines(projectId, docId)
+ res.setHeader('content-type', 'text/plain')
+ res.send(content)
}
-function getAllDocs(req, res, next) {
+async function getAllDocs(req, res) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'getting all docs')
- DocManager.getAllNonDeletedDocs(
- projectId,
- { lines: true, rev: true },
- function (error, docs) {
- if (docs == null) {
- docs = []
- }
- if (error) {
- return next(error)
- }
- const docViews = _buildDocsArrayView(projectId, docs)
- for (const docView of docViews) {
- if (!docView.lines) {
- logger.warn({ projectId, docId: docView._id }, 'missing doc lines')
- docView.lines = []
- }
- }
- res.json(docViews)
+ const docs = await DocManager.getAllNonDeletedDocs(projectId, {
+ lines: true,
+ rev: true,
+ })
+ const docViews = _buildDocsArrayView(projectId, docs)
+ for (const docView of docViews) {
+ if (!docView.lines) {
+ logger.warn({ projectId, docId: docView._id }, 'missing doc lines')
+ docView.lines = []
}
- )
+ }
+ res.json(docViews)
}
-function getAllDeletedDocs(req, res, next) {
+async function getAllDeletedDocs(req, res) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'getting all deleted docs')
- DocManager.getAllDeletedDocs(
- projectId,
- { name: true, deletedAt: true },
- function (error, docs) {
- if (error) {
- return next(error)
- }
- res.json(
- docs.map(doc => ({
- _id: doc._id.toString(),
- name: doc.name,
- deletedAt: doc.deletedAt,
- }))
- )
- }
+ const docs = await DocManager.getAllDeletedDocs(projectId, {
+ name: true,
+ deletedAt: true,
+ })
+ res.json(
+ docs.map(doc => ({
+ _id: doc._id.toString(),
+ name: doc.name,
+ deletedAt: doc.deletedAt,
+ }))
)
}
-function getAllRanges(req, res, next) {
+async function getAllRanges(req, res) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'getting all ranges')
- DocManager.getAllNonDeletedDocs(
- projectId,
- { ranges: true },
- function (error, docs) {
- if (docs == null) {
- docs = []
- }
- if (error) {
- return next(error)
- }
- res.json(_buildDocsArrayView(projectId, docs))
- }
- )
-}
-
-function projectHasRanges(req, res, next) {
- const { project_id: projectId } = req.params
- DocManager.projectHasRanges(projectId, (err, projectHasRanges) => {
- if (err) {
- return next(err)
- }
- res.json({ projectHasRanges })
+ const docs = await DocManager.getAllNonDeletedDocs(projectId, {
+ ranges: true,
})
+ res.json(_buildDocsArrayView(projectId, docs))
}
-function updateDoc(req, res, next) {
+async function getCommentThreadIds(req, res) {
+ const { project_id: projectId } = req.params
+ const threadIds = await DocManager.getCommentThreadIds(projectId)
+ res.json(threadIds)
+}
+
+async function getTrackedChangesUserIds(req, res) {
+ const { project_id: projectId } = req.params
+ const userIds = await DocManager.getTrackedChangesUserIds(projectId)
+ res.json(userIds)
+}
+
+async function projectHasRanges(req, res) {
+ const { project_id: projectId } = req.params
+ const projectHasRanges = await DocManager.projectHasRanges(projectId)
+ res.json({ projectHasRanges })
+}
+
+async function updateDoc(req, res) {
const { doc_id: docId, project_id: projectId } = req.params
const lines = req.body?.lines
const version = req.body?.version
@@ -172,25 +133,20 @@ function updateDoc(req, res, next) {
}
logger.debug({ projectId, docId }, 'got http request to update doc')
- DocManager.updateDoc(
+ const { modified, rev } = await DocManager.updateDoc(
projectId,
docId,
lines,
version,
- ranges,
- function (error, modified, rev) {
- if (error) {
- return next(error)
- }
- res.json({
- modified,
- rev,
- })
- }
+ ranges
)
+ res.json({
+ modified,
+ rev,
+ })
}
-function patchDoc(req, res, next) {
+async function patchDoc(req, res) {
const { doc_id: docId, project_id: projectId } = req.params
logger.debug({ projectId, docId }, 'patching doc')
@@ -203,12 +159,8 @@ function patchDoc(req, res, next) {
logger.fatal({ field }, 'joi validation for pathDoc is broken')
}
})
- DocManager.patchDoc(projectId, docId, meta, function (error) {
- if (error) {
- return next(error)
- }
- res.sendStatus(204)
- })
+ await DocManager.patchDoc(projectId, docId, meta)
+ res.sendStatus(204)
}
function _buildDocView(doc) {
@@ -221,10 +173,6 @@ function _buildDocView(doc) {
return docView
}
-function _buildRawDocView(doc) {
- return (doc?.lines ?? []).join('\n')
-}
-
function _buildDocsArrayView(projectId, docs) {
const docViews = []
for (const doc of docs) {
@@ -241,79 +189,69 @@ function _buildDocsArrayView(projectId, docs) {
return docViews
}
-function archiveAllDocs(req, res, next) {
+async function archiveAllDocs(req, res) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'archiving all docs')
- DocArchive.archiveAllDocs(projectId, function (error) {
- if (error) {
- return next(error)
- }
- res.sendStatus(204)
- })
+ await DocArchive.archiveAllDocs(projectId)
+ res.sendStatus(204)
}
-function archiveDoc(req, res, next) {
+async function archiveDoc(req, res) {
const { doc_id: docId, project_id: projectId } = req.params
logger.debug({ projectId, docId }, 'archiving a doc')
- DocArchive.archiveDoc(projectId, docId, function (error) {
- if (error) {
- return next(error)
- }
- res.sendStatus(204)
- })
+ await DocArchive.archiveDoc(projectId, docId)
+ res.sendStatus(204)
}
-function unArchiveAllDocs(req, res, next) {
+async function unArchiveAllDocs(req, res) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'unarchiving all docs')
- DocArchive.unArchiveAllDocs(projectId, function (err) {
- if (err) {
- if (err instanceof Errors.DocRevValueError) {
- logger.warn({ err }, 'Failed to unarchive doc')
- return res.sendStatus(409)
- }
- return next(err)
+ try {
+ await DocArchive.unArchiveAllDocs(projectId)
+ } catch (err) {
+ if (err instanceof Errors.DocRevValueError) {
+ logger.warn({ err }, 'Failed to unarchive doc')
+ return res.sendStatus(409)
}
- res.sendStatus(200)
- })
+ throw err
+ }
+ res.sendStatus(200)
}
-function destroyProject(req, res, next) {
+async function destroyProject(req, res) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'destroying all docs')
- DocArchive.destroyProject(projectId, function (error) {
- if (error) {
- return next(error)
- }
- res.sendStatus(204)
- })
+ await DocArchive.destroyProject(projectId)
+ res.sendStatus(204)
}
-function healthCheck(req, res) {
- HealthChecker.check(function (err) {
- if (err) {
- logger.err({ err }, 'error performing health check')
- res.sendStatus(500)
- } else {
- res.sendStatus(200)
- }
- })
+async function healthCheck(req, res) {
+ try {
+ await HealthChecker.check()
+ } catch (err) {
+ logger.err({ err }, 'error performing health check')
+ res.sendStatus(500)
+ return
+ }
+ res.sendStatus(200)
}
module.exports = {
- getDoc,
- peekDoc,
- isDocDeleted,
- getRawDoc,
- getAllDocs,
- getAllDeletedDocs,
- getAllRanges,
- projectHasRanges,
- updateDoc,
- patchDoc,
- archiveAllDocs,
- archiveDoc,
- unArchiveAllDocs,
- destroyProject,
- healthCheck,
+ getDoc: expressify(getDoc),
+ peekDoc: expressify(peekDoc),
+ isDocDeleted: expressify(isDocDeleted),
+ getRawDoc: expressify(getRawDoc),
+ getAllDocs: expressify(getAllDocs),
+ getAllDeletedDocs: expressify(getAllDeletedDocs),
+ getAllRanges: expressify(getAllRanges),
+ getTrackedChangesUserIds: expressify(getTrackedChangesUserIds),
+ getCommentThreadIds: expressify(getCommentThreadIds),
+ projectHasRanges: expressify(projectHasRanges),
+ updateDoc: expressify(updateDoc),
+ patchDoc: expressify(patchDoc),
+ archiveAllDocs: expressify(archiveAllDocs),
+ archiveDoc: expressify(archiveDoc),
+ unArchiveAllDocs: expressify(unArchiveAllDocs),
+ destroyProject: expressify(destroyProject),
+ healthCheck: expressify(healthCheck),
}
diff --git a/services/docstore/app/js/MongoManager.js b/services/docstore/app/js/MongoManager.js
index ad1a2d2b40..ef101f91c0 100644
--- a/services/docstore/app/js/MongoManager.js
+++ b/services/docstore/app/js/MongoManager.js
@@ -1,7 +1,6 @@
const { db, ObjectId } = require('./mongodb')
const Settings = require('@overleaf/settings')
const Errors = require('./Errors')
-const { callbackify } = require('node:util')
const ARCHIVING_LOCK_DURATION_MS = Settings.archivingLockDurationMs
@@ -241,34 +240,17 @@ async function destroyProject(projectId) {
}
module.exports = {
- findDoc: callbackify(findDoc),
- getProjectsDeletedDocs: callbackify(getProjectsDeletedDocs),
- getProjectsDocs: callbackify(getProjectsDocs),
- getArchivedProjectDocs: callbackify(getArchivedProjectDocs),
- getNonArchivedProjectDocIds: callbackify(getNonArchivedProjectDocIds),
- getNonDeletedArchivedProjectDocs: callbackify(
- getNonDeletedArchivedProjectDocs
- ),
- upsertIntoDocCollection: callbackify(upsertIntoDocCollection),
- restoreArchivedDoc: callbackify(restoreArchivedDoc),
- patchDoc: callbackify(patchDoc),
- getDocForArchiving: callbackify(getDocForArchiving),
- markDocAsArchived: callbackify(markDocAsArchived),
- checkRevUnchanged: callbackify(checkRevUnchanged),
- destroyProject: callbackify(destroyProject),
- promises: {
- findDoc,
- getProjectsDeletedDocs,
- getProjectsDocs,
- getArchivedProjectDocs,
- getNonArchivedProjectDocIds,
- getNonDeletedArchivedProjectDocs,
- upsertIntoDocCollection,
- restoreArchivedDoc,
- patchDoc,
- getDocForArchiving,
- markDocAsArchived,
- checkRevUnchanged,
- destroyProject,
- },
+ findDoc,
+ getProjectsDeletedDocs,
+ getProjectsDocs,
+ getArchivedProjectDocs,
+ getNonArchivedProjectDocIds,
+ getNonDeletedArchivedProjectDocs,
+ upsertIntoDocCollection,
+ restoreArchivedDoc,
+ patchDoc,
+ getDocForArchiving,
+ markDocAsArchived,
+ checkRevUnchanged,
+ destroyProject,
}
diff --git a/services/docstore/app/js/RangeManager.js b/services/docstore/app/js/RangeManager.js
index f36f68fe35..2fbadf9468 100644
--- a/services/docstore/app/js/RangeManager.js
+++ b/services/docstore/app/js/RangeManager.js
@@ -49,15 +49,25 @@ module.exports = RangeManager = {
updateMetadata(change.metadata)
}
for (const comment of Array.from(ranges.comments || [])) {
- comment.id = RangeManager._safeObjectId(comment.id)
- if ((comment.op != null ? comment.op.t : undefined) != null) {
- comment.op.t = RangeManager._safeObjectId(comment.op.t)
- }
+ // Two bugs resulted in mismatched ids, prefer the thread id from the op: https://github.com/overleaf/internal/issues/23272
+ comment.id = RangeManager._safeObjectId(comment.op?.t || comment.id)
+ if (comment.op) comment.op.t = comment.id
+
+ // resolved property is added to comments when they are obtained from history, but this state doesn't belong in mongo docs collection
+ // more info: https://github.com/overleaf/internal/issues/24371#issuecomment-2913095174
+ delete comment.op?.resolved
updateMetadata(comment.metadata)
}
return ranges
},
+ fixCommentIds(doc) {
+ for (const comment of doc?.ranges?.comments || []) {
+ // Two bugs resulted in mismatched ids, prefer the thread id from the op: https://github.com/overleaf/internal/issues/23272
+ if (comment.op?.t) comment.id = comment.op.t
+ }
+ },
+
_safeObjectId(data) {
try {
return new ObjectId(data)
diff --git a/services/docstore/app/js/StreamToBuffer.js b/services/docstore/app/js/StreamToBuffer.js
new file mode 100644
index 0000000000..09215a7367
--- /dev/null
+++ b/services/docstore/app/js/StreamToBuffer.js
@@ -0,0 +1,24 @@
+const { LoggerStream, WritableBuffer } = require('@overleaf/stream-utils')
+const Settings = require('@overleaf/settings')
+const logger = require('@overleaf/logger/logging-manager')
+const { pipeline } = require('node:stream/promises')
+
+module.exports = {
+ streamToBuffer,
+}
+
+async function streamToBuffer(projectId, docId, stream) {
+ const loggerTransform = new LoggerStream(
+ Settings.max_doc_length,
+ (size, isFlush) => {
+ logger.warn(
+ { projectId, docId, size, finishedReading: isFlush },
+ 'potentially large doc pulled down from gcs'
+ )
+ }
+ )
+
+ const buffer = new WritableBuffer()
+ await pipeline(stream, loggerTransform, buffer)
+ return buffer.contents()
+}
diff --git a/services/docstore/buildscript.txt b/services/docstore/buildscript.txt
index dbce00521b..4526aa959c 100644
--- a/services/docstore/buildscript.txt
+++ b/services/docstore/buildscript.txt
@@ -4,6 +4,6 @@ docstore
--env-add=
--env-pass-through=
--esmock-loader=False
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=True
---script-version=4.5.0
+--script-version=4.7.0
diff --git a/services/docstore/docker-compose.ci.yml b/services/docstore/docker-compose.ci.yml
index a8847e8996..cdb4783c5a 100644
--- a/services/docstore/docker-compose.ci.yml
+++ b/services/docstore/docker-compose.ci.yml
@@ -27,12 +27,15 @@ services:
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
+ volumes:
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
gcs:
condition: service_healthy
user: node
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run test:acceptance
@@ -44,12 +47,17 @@ services:
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
gcs:
image: fsouza/fake-gcs-server:1.45.2
command: ["--port=9090", "--scheme=http"]
diff --git a/services/docstore/docker-compose.yml b/services/docstore/docker-compose.yml
index f6e4a67c9c..a9099c7e7b 100644
--- a/services/docstore/docker-compose.yml
+++ b/services/docstore/docker-compose.yml
@@ -6,7 +6,7 @@ version: "2.3"
services:
test_unit:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/docstore
- ../../node_modules:/overleaf/node_modules
@@ -14,17 +14,19 @@ services:
working_dir: /overleaf/services/docstore
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/docstore
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
working_dir: /overleaf/services/docstore
environment:
ELASTIC_SEARCH_DSN: es:9200
@@ -34,24 +36,30 @@ services:
GCS_PROJECT_ID: fake
STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
gcs:
condition: service_healthy
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run --silent test:acceptance
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
gcs:
image: fsouza/fake-gcs-server:1.45.2
diff --git a/services/docstore/package.json b/services/docstore/package.json
index 2e478ca840..bf5857fd49 100644
--- a/services/docstore/package.json
+++ b/services/docstore/package.json
@@ -17,6 +17,7 @@
"types:check": "tsc --noEmit"
},
"dependencies": {
+ "@overleaf/fetch-utils": "*",
"@overleaf/logger": "*",
"@overleaf/metrics": "*",
"@overleaf/o-error": "*",
@@ -28,7 +29,7 @@
"body-parser": "^1.20.3",
"bunyan": "^1.8.15",
"celebrate": "^15.0.3",
- "express": "^4.21.0",
+ "express": "^4.21.2",
"lodash": "^4.17.21",
"mongodb-legacy": "6.1.3",
"p-map": "^4.0.0",
@@ -38,7 +39,7 @@
"@google-cloud/storage": "^6.10.1",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
- "mocha": "^10.2.0",
+ "mocha": "^11.1.0",
"sandboxed-module": "~2.0.4",
"sinon": "~9.0.2",
"sinon-chai": "^3.7.0",
diff --git a/services/docstore/test/acceptance/deps/healthcheck.sh b/services/docstore/test/acceptance/deps/healthcheck.sh
index cd19cea637..675c205be6 100644
--- a/services/docstore/test/acceptance/deps/healthcheck.sh
+++ b/services/docstore/test/acceptance/deps/healthcheck.sh
@@ -1,9 +1,9 @@
#!/bin/sh
# health check to allow 404 status code as valid
-STATUSCODE=$(curl --silent --output /dev/null --write-out "%{http_code}" $1)
+STATUSCODE=$(curl --silent --output /dev/null --write-out "%{http_code}" "$1")
# will be 000 on non-http error (e.g. connection failure)
-if test $STATUSCODE -ge 500 || test $STATUSCODE -lt 200; then
+if test "$STATUSCODE" -ge 500 || test "$STATUSCODE" -lt 200; then
exit 1
fi
exit 0
diff --git a/services/docstore/test/acceptance/js/ArchiveDocsTests.js b/services/docstore/test/acceptance/js/ArchiveDocsTests.js
index d9228103b6..7e254c7e84 100644
--- a/services/docstore/test/acceptance/js/ArchiveDocsTests.js
+++ b/services/docstore/test/acceptance/js/ArchiveDocsTests.js
@@ -1001,6 +1001,15 @@ describe('Archiving', function () {
},
version: 2,
}
+ this.fixedRanges = {
+ ...this.doc.ranges,
+ comments: [
+ {
+ ...this.doc.ranges.comments[0],
+ id: this.doc.ranges.comments[0].op.t,
+ },
+ ],
+ }
return DocstoreClient.createDoc(
this.project_id,
this.doc._id,
@@ -1048,7 +1057,7 @@ describe('Archiving', function () {
throw error
}
s3Doc.lines.should.deep.equal(this.doc.lines)
- const ranges = JSON.parse(JSON.stringify(this.doc.ranges)) // ObjectId -> String
+ const ranges = JSON.parse(JSON.stringify(this.fixedRanges)) // ObjectId -> String
s3Doc.ranges.should.deep.equal(ranges)
return done()
}
@@ -1075,7 +1084,7 @@ describe('Archiving', function () {
throw error
}
doc.lines.should.deep.equal(this.doc.lines)
- doc.ranges.should.deep.equal(this.doc.ranges)
+ doc.ranges.should.deep.equal(this.fixedRanges)
expect(doc.inS3).not.to.exist
return done()
})
diff --git a/services/docstore/test/acceptance/js/GettingAllDocsTests.js b/services/docstore/test/acceptance/js/GettingAllDocsTests.js
index 8fe5e7d91b..57851b2c3b 100644
--- a/services/docstore/test/acceptance/js/GettingAllDocsTests.js
+++ b/services/docstore/test/acceptance/js/GettingAllDocsTests.js
@@ -20,30 +20,73 @@ const DocstoreClient = require('./helpers/DocstoreClient')
describe('Getting all docs', function () {
beforeEach(function (done) {
this.project_id = new ObjectId()
+ this.threadId1 = new ObjectId().toString()
+ this.threadId2 = new ObjectId().toString()
this.docs = [
{
_id: new ObjectId(),
lines: ['one', 'two', 'three'],
- ranges: { mock: 'one' },
+ ranges: {
+ comments: [
+ { id: new ObjectId().toString(), op: { t: this.threadId1 } },
+ ],
+ changes: [
+ {
+ id: new ObjectId().toString(),
+ metadata: { user_id: 'user-id-1' },
+ },
+ ],
+ },
rev: 2,
},
{
_id: new ObjectId(),
lines: ['aaa', 'bbb', 'ccc'],
- ranges: { mock: 'two' },
+ ranges: {
+ changes: [
+ {
+ id: new ObjectId().toString(),
+ metadata: { user_id: 'user-id-2' },
+ },
+ ],
+ },
rev: 4,
},
{
_id: new ObjectId(),
lines: ['111', '222', '333'],
- ranges: { mock: 'three' },
+ ranges: {
+ comments: [
+ { id: new ObjectId().toString(), op: { t: this.threadId2 } },
+ ],
+ changes: [
+ {
+ id: new ObjectId().toString(),
+ metadata: { user_id: 'anonymous-user' },
+ },
+ ],
+ },
rev: 6,
},
]
+ this.fixedRanges = this.docs.map(doc => {
+ if (!doc.ranges?.comments?.length) return doc.ranges
+ return {
+ ...doc.ranges,
+ comments: [
+ { ...doc.ranges.comments[0], id: doc.ranges.comments[0].op.t },
+ ],
+ }
+ })
this.deleted_doc = {
_id: new ObjectId(),
lines: ['deleted'],
- ranges: { mock: 'four' },
+ ranges: {
+ comments: [{ id: new ObjectId().toString(), op: { t: 'thread-id-3' } }],
+ changes: [
+ { id: new ObjectId().toString(), metadata: { user_id: 'user-id-3' } },
+ ],
+ },
rev: 8,
}
const version = 42
@@ -96,7 +139,7 @@ describe('Getting all docs', function () {
})
})
- return it('getAllRanges should return all the (non-deleted) doc ranges', function (done) {
+ it('getAllRanges should return all the (non-deleted) doc ranges', function (done) {
return DocstoreClient.getAllRanges(this.project_id, (error, res, docs) => {
if (error != null) {
throw error
@@ -104,9 +147,38 @@ describe('Getting all docs', function () {
docs.length.should.equal(this.docs.length)
for (let i = 0; i < docs.length; i++) {
const doc = docs[i]
- doc.ranges.should.deep.equal(this.docs[i].ranges)
+ doc.ranges.should.deep.equal(this.fixedRanges[i])
}
return done()
})
})
+
+ it('getTrackedChangesUserIds should return all the user ids from (non-deleted) ranges', function (done) {
+ DocstoreClient.getTrackedChangesUserIds(
+ this.project_id,
+ (error, res, userIds) => {
+ if (error != null) {
+ throw error
+ }
+ userIds.should.deep.equal(['user-id-1', 'user-id-2'])
+ done()
+ }
+ )
+ })
+
+ it('getCommentThreadIds should return all the thread ids from (non-deleted) ranges', function (done) {
+ DocstoreClient.getCommentThreadIds(
+ this.project_id,
+ (error, res, threadIds) => {
+ if (error != null) {
+ throw error
+ }
+ threadIds.should.deep.equal({
+ [this.docs[0]._id.toString()]: [this.threadId1],
+ [this.docs[2]._id.toString()]: [this.threadId2],
+ })
+ done()
+ }
+ )
+ })
})
diff --git a/services/docstore/test/acceptance/js/GettingDocsTests.js b/services/docstore/test/acceptance/js/GettingDocsTests.js
index 121b3c1e24..1cfc53c5c6 100644
--- a/services/docstore/test/acceptance/js/GettingDocsTests.js
+++ b/services/docstore/test/acceptance/js/GettingDocsTests.js
@@ -28,10 +28,26 @@ describe('Getting a doc', function () {
op: { i: 'foo', p: 3 },
meta: {
user_id: new ObjectId().toString(),
- ts: new Date().toString(),
+ ts: new Date().toJSON(),
},
},
],
+ comments: [
+ {
+ id: new ObjectId().toString(),
+ op: { c: 'comment', p: 1, t: new ObjectId().toString() },
+ metadata: {
+ user_id: new ObjectId().toString(),
+ ts: new Date().toJSON(),
+ },
+ },
+ ],
+ }
+ this.fixedRanges = {
+ ...this.ranges,
+ comments: [
+ { ...this.ranges.comments[0], id: this.ranges.comments[0].op.t },
+ ],
}
return DocstoreApp.ensureRunning(() => {
return DocstoreClient.createDoc(
@@ -60,7 +76,7 @@ describe('Getting a doc', function () {
if (error) return done(error)
doc.lines.should.deep.equal(this.lines)
doc.version.should.equal(this.version)
- doc.ranges.should.deep.equal(this.ranges)
+ doc.ranges.should.deep.equal(this.fixedRanges)
return done()
}
)
@@ -114,7 +130,7 @@ describe('Getting a doc', function () {
if (error) return done(error)
doc.lines.should.deep.equal(this.lines)
doc.version.should.equal(this.version)
- doc.ranges.should.deep.equal(this.ranges)
+ doc.ranges.should.deep.equal(this.fixedRanges)
doc.deleted.should.equal(true)
return done()
}
diff --git a/services/docstore/test/acceptance/js/HealthCheckerTest.js b/services/docstore/test/acceptance/js/HealthCheckerTest.js
new file mode 100644
index 0000000000..b25a45312b
--- /dev/null
+++ b/services/docstore/test/acceptance/js/HealthCheckerTest.js
@@ -0,0 +1,28 @@
+const { db } = require('../../../app/js/mongodb')
+const DocstoreApp = require('./helpers/DocstoreApp')
+const DocstoreClient = require('./helpers/DocstoreClient')
+const { expect } = require('chai')
+
+describe('HealthChecker', function () {
+ beforeEach('start', function (done) {
+ DocstoreApp.ensureRunning(done)
+ })
+ beforeEach('clear docs collection', async function () {
+ await db.docs.deleteMany({})
+ })
+ let res
+ beforeEach('run health check', function (done) {
+ DocstoreClient.healthCheck((err, _res) => {
+ res = _res
+ done(err)
+ })
+ })
+
+ it('should return 200', function () {
+ res.statusCode.should.equal(200)
+ })
+
+ it('should not leave any cruft behind', async function () {
+ expect(await db.docs.find({}).toArray()).to.deep.equal([])
+ })
+})
diff --git a/services/docstore/test/acceptance/js/helpers/DocstoreApp.js b/services/docstore/test/acceptance/js/helpers/DocstoreApp.js
index 03db0ea322..5e837b1277 100644
--- a/services/docstore/test/acceptance/js/helpers/DocstoreApp.js
+++ b/services/docstore/test/acceptance/js/helpers/DocstoreApp.js
@@ -1,5 +1,4 @@
const app = require('../../../../app')
-require('@overleaf/logger').logger.level('error')
const settings = require('@overleaf/settings')
module.exports = {
diff --git a/services/docstore/test/acceptance/js/helpers/DocstoreClient.js b/services/docstore/test/acceptance/js/helpers/DocstoreClient.js
index 790ec8f237..cb8bce2579 100644
--- a/services/docstore/test/acceptance/js/helpers/DocstoreClient.js
+++ b/services/docstore/test/acceptance/js/helpers/DocstoreClient.js
@@ -100,6 +100,26 @@ module.exports = DocstoreClient = {
)
},
+ getCommentThreadIds(projectId, callback) {
+ request.get(
+ {
+ url: `http://127.0.0.1:${settings.internal.docstore.port}/project/${projectId}/comment-thread-ids`,
+ json: true,
+ },
+ callback
+ )
+ },
+
+ getTrackedChangesUserIds(projectId, callback) {
+ request.get(
+ {
+ url: `http://127.0.0.1:${settings.internal.docstore.port}/project/${projectId}/tracked-changes-user-ids`,
+ json: true,
+ },
+ callback
+ )
+ },
+
updateDoc(projectId, docId, lines, version, ranges, callback) {
return request.post(
{
@@ -181,6 +201,13 @@ module.exports = DocstoreClient = {
)
},
+ healthCheck(callback) {
+ request.get(
+ `http://127.0.0.1:${settings.internal.docstore.port}/health_check`,
+ callback
+ )
+ },
+
getS3Doc(projectId, docId, callback) {
getStringFromPersistor(
Persistor,
diff --git a/services/docstore/test/unit/js/DocArchiveManagerTests.js b/services/docstore/test/unit/js/DocArchiveManagerTests.js
index 13046d86fc..2ec1cb2016 100644
--- a/services/docstore/test/unit/js/DocArchiveManagerTests.js
+++ b/services/docstore/test/unit/js/DocArchiveManagerTests.js
@@ -4,6 +4,7 @@ const modulePath = '../../../app/js/DocArchiveManager.js'
const SandboxedModule = require('sandboxed-module')
const { ObjectId } = require('mongodb-legacy')
const Errors = require('../../../app/js/Errors')
+const StreamToBuffer = require('../../../app/js/StreamToBuffer')
describe('DocArchiveManager', function () {
let DocArchiveManager,
@@ -22,13 +23,15 @@ describe('DocArchiveManager', function () {
md5Sum,
projectId,
readStream,
- stream
+ stream,
+ streamToBuffer
beforeEach(function () {
md5Sum = 'decafbad'
RangeManager = {
jsonRangesToMongo: sinon.stub().returns({ mongo: 'ranges' }),
+ fixCommentIds: sinon.stub(),
}
Settings = {
docstore: {
@@ -140,17 +143,33 @@ describe('DocArchiveManager', function () {
}
MongoManager = {
- promises: {
- markDocAsArchived: sinon.stub().resolves(),
- restoreArchivedDoc: sinon.stub().resolves(),
- upsertIntoDocCollection: sinon.stub().resolves(),
- getProjectsDocs: sinon.stub().resolves(mongoDocs),
- getNonDeletedArchivedProjectDocs: getArchivedProjectDocs,
- getNonArchivedProjectDocIds,
- getArchivedProjectDocs,
- findDoc: sinon.stub().callsFake(fakeGetDoc),
- getDocForArchiving: sinon.stub().callsFake(fakeGetDoc),
- destroyProject: sinon.stub().resolves(),
+ markDocAsArchived: sinon.stub().resolves(),
+ restoreArchivedDoc: sinon.stub().resolves(),
+ upsertIntoDocCollection: sinon.stub().resolves(),
+ getProjectsDocs: sinon.stub().resolves(mongoDocs),
+ getNonDeletedArchivedProjectDocs: getArchivedProjectDocs,
+ getNonArchivedProjectDocIds,
+ getArchivedProjectDocs,
+ findDoc: sinon.stub().callsFake(fakeGetDoc),
+ getDocForArchiving: sinon.stub().callsFake(fakeGetDoc),
+ destroyProject: sinon.stub().resolves(),
+ }
+
+ // Wrap streamToBuffer so that we can pass in something that it expects (in
+ // this case, a Promise) rather than a stubbed stream object
+ streamToBuffer = {
+ streamToBuffer: async () => {
+ const inputStream = new Promise(resolve => {
+ stream.on('data', data => resolve(data))
+ })
+
+ const value = await StreamToBuffer.streamToBuffer(
+ 'testProjectId',
+ 'testDocId',
+ inputStream
+ )
+
+ return value
},
}
@@ -163,15 +182,20 @@ describe('DocArchiveManager', function () {
'./RangeManager': RangeManager,
'./PersistorManager': PersistorManager,
'./Errors': Errors,
+ './StreamToBuffer': streamToBuffer,
},
})
})
describe('archiveDoc', function () {
it('should resolve when passed a valid document', async function () {
- await expect(
- DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
- ).to.eventually.be.fulfilled
+ await expect(DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)).to
+ .eventually.be.fulfilled
+ })
+
+ it('should fix comment ids', async function () {
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[1]._id)
+ expect(RangeManager.fixCommentIds).to.have.been.called
})
it('should throw an error if the doc has no lines', async function () {
@@ -179,26 +203,26 @@ describe('DocArchiveManager', function () {
doc.lines = null
await expect(
- DocArchiveManager.promises.archiveDoc(projectId, doc._id)
+ DocArchiveManager.archiveDoc(projectId, doc._id)
).to.eventually.be.rejectedWith('doc has no lines')
})
it('should add the schema version', async function () {
- await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[1]._id)
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[1]._id)
expect(StreamUtils.ReadableString).to.have.been.calledWith(
sinon.match(/"schema_v":1/)
)
})
it('should calculate the hex md5 sum of the content', async function () {
- await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)
expect(Crypto.createHash).to.have.been.calledWith('md5')
expect(HashUpdate).to.have.been.calledWith(archivedDocJson)
expect(HashDigest).to.have.been.calledWith('hex')
})
it('should pass the md5 hash to the object persistor for verification', async function () {
- await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)
expect(PersistorManager.sendStream).to.have.been.calledWith(
sinon.match.any,
@@ -209,7 +233,7 @@ describe('DocArchiveManager', function () {
})
it('should pass the correct bucket and key to the persistor', async function () {
- await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)
expect(PersistorManager.sendStream).to.have.been.calledWith(
Settings.docstore.bucket,
@@ -218,7 +242,7 @@ describe('DocArchiveManager', function () {
})
it('should create a stream from the encoded json and send it', async function () {
- await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)
expect(StreamUtils.ReadableString).to.have.been.calledWith(
archivedDocJson
)
@@ -230,8 +254,8 @@ describe('DocArchiveManager', function () {
})
it('should mark the doc as archived', async function () {
- await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
- expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)
+ expect(MongoManager.markDocAsArchived).to.have.been.calledWith(
projectId,
mongoDocs[0]._id,
mongoDocs[0].rev
@@ -244,8 +268,8 @@ describe('DocArchiveManager', function () {
})
it('should bail out early', async function () {
- await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
- expect(MongoManager.promises.getDocForArchiving).to.not.have.been.called
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)
+ expect(MongoManager.getDocForArchiving).to.not.have.been.called
})
})
@@ -262,7 +286,7 @@ describe('DocArchiveManager', function () {
it('should return an error', async function () {
await expect(
- DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
+ DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)
).to.eventually.be.rejectedWith('null bytes detected')
})
})
@@ -273,21 +297,19 @@ describe('DocArchiveManager', function () {
describe('when the doc is in S3', function () {
beforeEach(function () {
- MongoManager.promises.findDoc = sinon
- .stub()
- .resolves({ inS3: true, rev })
+ MongoManager.findDoc = sinon.stub().resolves({ inS3: true, rev })
docId = mongoDocs[0]._id
lines = ['doc', 'lines']
rev = 123
})
it('should resolve when passed a valid document', async function () {
- await expect(DocArchiveManager.promises.unarchiveDoc(projectId, docId))
- .to.eventually.be.fulfilled
+ await expect(DocArchiveManager.unarchiveDoc(projectId, docId)).to
+ .eventually.be.fulfilled
})
it('should test md5 validity with the raw buffer', async function () {
- await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
+ await DocArchiveManager.unarchiveDoc(projectId, docId)
expect(HashUpdate).to.have.been.calledWith(
sinon.match.instanceOf(Buffer)
)
@@ -296,15 +318,17 @@ describe('DocArchiveManager', function () {
it('should throw an error if the md5 does not match', async function () {
PersistorManager.getObjectMd5Hash.resolves('badf00d')
await expect(
- DocArchiveManager.promises.unarchiveDoc(projectId, docId)
+ DocArchiveManager.unarchiveDoc(projectId, docId)
).to.eventually.be.rejected.and.be.instanceof(Errors.Md5MismatchError)
})
it('should restore the doc in Mongo', async function () {
- await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
- expect(
- MongoManager.promises.restoreArchivedDoc
- ).to.have.been.calledWith(projectId, docId, archivedDoc)
+ await DocArchiveManager.unarchiveDoc(projectId, docId)
+ expect(MongoManager.restoreArchivedDoc).to.have.been.calledWith(
+ projectId,
+ docId,
+ archivedDoc
+ )
})
describe('when archiving is not configured', function () {
@@ -314,15 +338,15 @@ describe('DocArchiveManager', function () {
it('should error out on archived doc', async function () {
await expect(
- DocArchiveManager.promises.unarchiveDoc(projectId, docId)
+ DocArchiveManager.unarchiveDoc(projectId, docId)
).to.eventually.be.rejected.and.match(
/found archived doc, but archiving backend is not configured/
)
})
it('should return early on non-archived doc', async function () {
- MongoManager.promises.findDoc = sinon.stub().resolves({ rev })
- await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
+ MongoManager.findDoc = sinon.stub().resolves({ rev })
+ await DocArchiveManager.unarchiveDoc(projectId, docId)
expect(PersistorManager.getObjectMd5Hash).to.not.have.been.called
})
})
@@ -340,10 +364,12 @@ describe('DocArchiveManager', function () {
})
it('should return the docs lines', async function () {
- await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
- expect(
- MongoManager.promises.restoreArchivedDoc
- ).to.have.been.calledWith(projectId, docId, { lines, rev })
+ await DocArchiveManager.unarchiveDoc(projectId, docId)
+ expect(MongoManager.restoreArchivedDoc).to.have.been.calledWith(
+ projectId,
+ docId,
+ { lines, rev }
+ )
})
})
@@ -362,14 +388,16 @@ describe('DocArchiveManager', function () {
})
it('should return the doc lines and ranges', async function () {
- await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
- expect(
- MongoManager.promises.restoreArchivedDoc
- ).to.have.been.calledWith(projectId, docId, {
- lines,
- ranges: { mongo: 'ranges' },
- rev: 456,
- })
+ await DocArchiveManager.unarchiveDoc(projectId, docId)
+ expect(MongoManager.restoreArchivedDoc).to.have.been.calledWith(
+ projectId,
+ docId,
+ {
+ lines,
+ ranges: { mongo: 'ranges' },
+ rev: 456,
+ }
+ )
})
})
@@ -383,10 +411,12 @@ describe('DocArchiveManager', function () {
})
it('should return only the doc lines', async function () {
- await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
- expect(
- MongoManager.promises.restoreArchivedDoc
- ).to.have.been.calledWith(projectId, docId, { lines, rev: 456 })
+ await DocArchiveManager.unarchiveDoc(projectId, docId)
+ expect(MongoManager.restoreArchivedDoc).to.have.been.calledWith(
+ projectId,
+ docId,
+ { lines, rev: 456 }
+ )
})
})
@@ -400,10 +430,12 @@ describe('DocArchiveManager', function () {
})
it('should use the rev obtained from Mongo', async function () {
- await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
- expect(
- MongoManager.promises.restoreArchivedDoc
- ).to.have.been.calledWith(projectId, docId, { lines, rev })
+ await DocArchiveManager.unarchiveDoc(projectId, docId)
+ expect(MongoManager.restoreArchivedDoc).to.have.been.calledWith(
+ projectId,
+ docId,
+ { lines, rev }
+ )
})
})
@@ -418,7 +450,7 @@ describe('DocArchiveManager', function () {
it('should throw an error', async function () {
await expect(
- DocArchiveManager.promises.unarchiveDoc(projectId, docId)
+ DocArchiveManager.unarchiveDoc(projectId, docId)
).to.eventually.be.rejectedWith(
"I don't understand the doc format in s3"
)
@@ -428,8 +460,8 @@ describe('DocArchiveManager', function () {
})
it('should not do anything if the file is already unarchived', async function () {
- MongoManager.promises.findDoc.resolves({ inS3: false })
- await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
+ MongoManager.findDoc.resolves({ inS3: false })
+ await DocArchiveManager.unarchiveDoc(projectId, docId)
expect(PersistorManager.getObjectStream).not.to.have.been.called
})
@@ -438,7 +470,7 @@ describe('DocArchiveManager', function () {
.stub()
.rejects(new Errors.NotFoundError())
await expect(
- DocArchiveManager.promises.unarchiveDoc(projectId, docId)
+ DocArchiveManager.unarchiveDoc(projectId, docId)
).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError)
})
})
@@ -446,13 +478,11 @@ describe('DocArchiveManager', function () {
describe('destroyProject', function () {
describe('when archiving is enabled', function () {
beforeEach(async function () {
- await DocArchiveManager.promises.destroyProject(projectId)
+ await DocArchiveManager.destroyProject(projectId)
})
it('should delete the project in Mongo', function () {
- expect(MongoManager.promises.destroyProject).to.have.been.calledWith(
- projectId
- )
+ expect(MongoManager.destroyProject).to.have.been.calledWith(projectId)
})
it('should delete the project in the persistor', function () {
@@ -466,13 +496,11 @@ describe('DocArchiveManager', function () {
describe('when archiving is disabled', function () {
beforeEach(async function () {
Settings.docstore.backend = ''
- await DocArchiveManager.promises.destroyProject(projectId)
+ await DocArchiveManager.destroyProject(projectId)
})
it('should delete the project in Mongo', function () {
- expect(MongoManager.promises.destroyProject).to.have.been.calledWith(
- projectId
- )
+ expect(MongoManager.destroyProject).to.have.been.calledWith(projectId)
})
it('should not delete the project in the persistor', function () {
@@ -483,33 +511,35 @@ describe('DocArchiveManager', function () {
describe('archiveAllDocs', function () {
it('should resolve with valid arguments', async function () {
- await expect(DocArchiveManager.promises.archiveAllDocs(projectId)).to
- .eventually.be.fulfilled
+ await expect(DocArchiveManager.archiveAllDocs(projectId)).to.eventually.be
+ .fulfilled
})
it('should archive all project docs which are not in s3', async function () {
- await DocArchiveManager.promises.archiveAllDocs(projectId)
+ await DocArchiveManager.archiveAllDocs(projectId)
// not inS3
- expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
+ expect(MongoManager.markDocAsArchived).to.have.been.calledWith(
projectId,
mongoDocs[0]._id
)
- expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
+ expect(MongoManager.markDocAsArchived).to.have.been.calledWith(
projectId,
mongoDocs[1]._id
)
- expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
+ expect(MongoManager.markDocAsArchived).to.have.been.calledWith(
projectId,
mongoDocs[4]._id
)
// inS3
- expect(
- MongoManager.promises.markDocAsArchived
- ).not.to.have.been.calledWith(projectId, mongoDocs[2]._id)
- expect(
- MongoManager.promises.markDocAsArchived
- ).not.to.have.been.calledWith(projectId, mongoDocs[3]._id)
+ expect(MongoManager.markDocAsArchived).not.to.have.been.calledWith(
+ projectId,
+ mongoDocs[2]._id
+ )
+ expect(MongoManager.markDocAsArchived).not.to.have.been.calledWith(
+ projectId,
+ mongoDocs[3]._id
+ )
})
describe('when archiving is not configured', function () {
@@ -518,21 +548,20 @@ describe('DocArchiveManager', function () {
})
it('should bail out early', async function () {
- await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
- expect(MongoManager.promises.getNonArchivedProjectDocIds).to.not.have
- .been.called
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)
+ expect(MongoManager.getNonArchivedProjectDocIds).to.not.have.been.called
})
})
})
describe('unArchiveAllDocs', function () {
it('should resolve with valid arguments', async function () {
- await expect(DocArchiveManager.promises.unArchiveAllDocs(projectId)).to
- .eventually.be.fulfilled
+ await expect(DocArchiveManager.unArchiveAllDocs(projectId)).to.eventually
+ .be.fulfilled
})
it('should unarchive all inS3 docs', async function () {
- await DocArchiveManager.promises.unArchiveAllDocs(projectId)
+ await DocArchiveManager.unArchiveAllDocs(projectId)
for (const doc of archivedDocs) {
expect(PersistorManager.getObjectStream).to.have.been.calledWith(
@@ -548,9 +577,9 @@ describe('DocArchiveManager', function () {
})
it('should bail out early', async function () {
- await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
- expect(MongoManager.promises.getNonDeletedArchivedProjectDocs).to.not
- .have.been.called
+ await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id)
+ expect(MongoManager.getNonDeletedArchivedProjectDocs).to.not.have.been
+ .called
})
})
})
diff --git a/services/docstore/test/unit/js/DocManagerTests.js b/services/docstore/test/unit/js/DocManagerTests.js
index 8405520e6e..67a2f26547 100644
--- a/services/docstore/test/unit/js/DocManagerTests.js
+++ b/services/docstore/test/unit/js/DocManagerTests.js
@@ -17,25 +17,22 @@ describe('DocManager', function () {
this.version = 42
this.MongoManager = {
- promises: {
- findDoc: sinon.stub(),
- getProjectsDocs: sinon.stub(),
- patchDoc: sinon.stub().resolves(),
- upsertIntoDocCollection: sinon.stub().resolves(),
- },
+ findDoc: sinon.stub(),
+ getProjectsDocs: sinon.stub(),
+ patchDoc: sinon.stub().resolves(),
+ upsertIntoDocCollection: sinon.stub().resolves(),
}
this.DocArchiveManager = {
- promises: {
- unarchiveDoc: sinon.stub(),
- unArchiveAllDocs: sinon.stub(),
- archiveDoc: sinon.stub().resolves(),
- },
+ unarchiveDoc: sinon.stub(),
+ unArchiveAllDocs: sinon.stub(),
+ archiveDoc: sinon.stub().resolves(),
}
this.RangeManager = {
jsonRangesToMongo(r) {
return r
},
shouldUpdateRanges: sinon.stub().returns(false),
+ fixCommentIds: sinon.stub(),
}
this.settings = { docstore: {} }
@@ -52,7 +49,7 @@ describe('DocManager', function () {
describe('getFullDoc', function () {
beforeEach(function () {
- this.DocManager.promises._getDoc = sinon.stub()
+ this.DocManager._getDoc = sinon.stub()
this.doc = {
_id: this.doc_id,
lines: ['2134'],
@@ -60,13 +57,10 @@ describe('DocManager', function () {
})
it('should call get doc with a quick filter', async function () {
- this.DocManager.promises._getDoc.resolves(this.doc)
- const doc = await this.DocManager.promises.getFullDoc(
- this.project_id,
- this.doc_id
- )
+ this.DocManager._getDoc.resolves(this.doc)
+ const doc = await this.DocManager.getFullDoc(this.project_id, this.doc_id)
doc.should.equal(this.doc)
- this.DocManager.promises._getDoc
+ this.DocManager._getDoc
.calledWith(this.project_id, this.doc_id, {
lines: true,
rev: true,
@@ -79,27 +73,27 @@ describe('DocManager', function () {
})
it('should return error when get doc errors', async function () {
- this.DocManager.promises._getDoc.rejects(this.stubbedError)
+ this.DocManager._getDoc.rejects(this.stubbedError)
await expect(
- this.DocManager.promises.getFullDoc(this.project_id, this.doc_id)
+ this.DocManager.getFullDoc(this.project_id, this.doc_id)
).to.be.rejectedWith(this.stubbedError)
})
})
describe('getRawDoc', function () {
beforeEach(function () {
- this.DocManager.promises._getDoc = sinon.stub()
+ this.DocManager._getDoc = sinon.stub()
this.doc = { lines: ['2134'] }
})
it('should call get doc with a quick filter', async function () {
- this.DocManager.promises._getDoc.resolves(this.doc)
- const doc = await this.DocManager.promises.getDocLines(
+ this.DocManager._getDoc.resolves(this.doc)
+ const content = await this.DocManager.getDocLines(
this.project_id,
this.doc_id
)
- doc.should.equal(this.doc)
- this.DocManager.promises._getDoc
+ content.should.equal(this.doc.lines.join('\n'))
+ this.DocManager._getDoc
.calledWith(this.project_id, this.doc_id, {
lines: true,
inS3: true,
@@ -108,11 +102,46 @@ describe('DocManager', function () {
})
it('should return error when get doc errors', async function () {
- this.DocManager.promises._getDoc.rejects(this.stubbedError)
+ this.DocManager._getDoc.rejects(this.stubbedError)
await expect(
- this.DocManager.promises.getDocLines(this.project_id, this.doc_id)
+ this.DocManager.getDocLines(this.project_id, this.doc_id)
).to.be.rejectedWith(this.stubbedError)
})
+
+ it('should return error when get doc does not exist', async function () {
+ this.DocManager._getDoc.resolves(null)
+ await expect(
+ this.DocManager.getDocLines(this.project_id, this.doc_id)
+ ).to.be.rejectedWith(Errors.NotFoundError)
+ })
+
+ it('should return error when get doc has no lines', async function () {
+ this.DocManager._getDoc.resolves({})
+ await expect(
+ this.DocManager.getDocLines(this.project_id, this.doc_id)
+ ).to.be.rejectedWith(Errors.DocWithoutLinesError)
+ })
+ })
+
+ describe('_getDoc', function () {
+ it('should return error when get doc does not exist', async function () {
+ this.MongoManager.findDoc.resolves(null)
+ await expect(
+ this.DocManager._getDoc(this.project_id, this.doc_id, { inS3: true })
+ ).to.be.rejectedWith(Errors.NotFoundError)
+ })
+
+ it('should fix comment ids', async function () {
+ this.MongoManager.findDoc.resolves({
+ _id: this.doc_id,
+ ranges: {},
+ })
+ await this.DocManager._getDoc(this.project_id, this.doc_id, {
+ inS3: true,
+ ranges: true,
+ })
+ expect(this.RangeManager.fixCommentIds).to.have.been.called
+ })
})
describe('getDoc', function () {
@@ -128,26 +157,25 @@ describe('DocManager', function () {
describe('when using a filter', function () {
beforeEach(function () {
- this.MongoManager.promises.findDoc.resolves(this.doc)
+ this.MongoManager.findDoc.resolves(this.doc)
})
it('should error if inS3 is not set to true', async function () {
await expect(
- this.DocManager.promises._getDoc(this.project_id, this.doc_id, {
+ this.DocManager._getDoc(this.project_id, this.doc_id, {
inS3: false,
})
).to.be.rejected
})
it('should always get inS3 even when no filter is passed', async function () {
- await expect(
- this.DocManager.promises._getDoc(this.project_id, this.doc_id)
- ).to.be.rejected
- this.MongoManager.promises.findDoc.called.should.equal(false)
+ await expect(this.DocManager._getDoc(this.project_id, this.doc_id)).to
+ .be.rejected
+ this.MongoManager.findDoc.called.should.equal(false)
})
it('should not error if inS3 is set to true', async function () {
- await this.DocManager.promises._getDoc(this.project_id, this.doc_id, {
+ await this.DocManager._getDoc(this.project_id, this.doc_id, {
inS3: true,
})
})
@@ -155,8 +183,8 @@ describe('DocManager', function () {
describe('when the doc is in the doc collection', function () {
beforeEach(async function () {
- this.MongoManager.promises.findDoc.resolves(this.doc)
- this.result = await this.DocManager.promises._getDoc(
+ this.MongoManager.findDoc.resolves(this.doc)
+ this.result = await this.DocManager._getDoc(
this.project_id,
this.doc_id,
{ version: true, inS3: true }
@@ -164,7 +192,7 @@ describe('DocManager', function () {
})
it('should get the doc from the doc collection', function () {
- this.MongoManager.promises.findDoc
+ this.MongoManager.findDoc
.calledWith(this.project_id, this.doc_id)
.should.equal(true)
})
@@ -177,9 +205,9 @@ describe('DocManager', function () {
describe('when MongoManager.findDoc errors', function () {
it('should return the error', async function () {
- this.MongoManager.promises.findDoc.rejects(this.stubbedError)
+ this.MongoManager.findDoc.rejects(this.stubbedError)
await expect(
- this.DocManager.promises._getDoc(this.project_id, this.doc_id, {
+ this.DocManager._getDoc(this.project_id, this.doc_id, {
version: true,
inS3: true,
})
@@ -202,15 +230,15 @@ describe('DocManager', function () {
version: 2,
inS3: false,
}
- this.MongoManager.promises.findDoc.resolves(this.doc)
- this.DocArchiveManager.promises.unarchiveDoc.callsFake(
+ this.MongoManager.findDoc.resolves(this.doc)
+ this.DocArchiveManager.unarchiveDoc.callsFake(
async (projectId, docId) => {
- this.MongoManager.promises.findDoc.resolves({
+ this.MongoManager.findDoc.resolves({
...this.unarchivedDoc,
})
}
)
- this.result = await this.DocManager.promises._getDoc(
+ this.result = await this.DocManager._getDoc(
this.project_id,
this.doc_id,
{
@@ -221,13 +249,13 @@ describe('DocManager', function () {
})
it('should call the DocArchive to unarchive the doc', function () {
- this.DocArchiveManager.promises.unarchiveDoc
+ this.DocArchiveManager.unarchiveDoc
.calledWith(this.project_id, this.doc_id)
.should.equal(true)
})
it('should look up the doc twice', function () {
- this.MongoManager.promises.findDoc.calledTwice.should.equal(true)
+ this.MongoManager.findDoc.calledTwice.should.equal(true)
})
it('should return the doc', function () {
@@ -239,9 +267,9 @@ describe('DocManager', function () {
describe('when the doc does not exist in the docs collection', function () {
it('should return a NotFoundError', async function () {
- this.MongoManager.promises.findDoc.resolves(null)
+ this.MongoManager.findDoc.resolves(null)
await expect(
- this.DocManager.promises._getDoc(this.project_id, this.doc_id, {
+ this.DocManager._getDoc(this.project_id, this.doc_id, {
version: true,
inS3: true,
})
@@ -262,23 +290,27 @@ describe('DocManager', function () {
lines: ['mock-lines'],
},
]
- this.MongoManager.promises.getProjectsDocs.resolves(this.docs)
- this.DocArchiveManager.promises.unArchiveAllDocs.resolves(this.docs)
- this.filter = { lines: true }
- this.result = await this.DocManager.promises.getAllNonDeletedDocs(
+ this.MongoManager.getProjectsDocs.resolves(this.docs)
+ this.DocArchiveManager.unArchiveAllDocs.resolves(this.docs)
+ this.filter = { lines: true, ranges: true }
+ this.result = await this.DocManager.getAllNonDeletedDocs(
this.project_id,
this.filter
)
})
it('should get the project from the database', function () {
- this.MongoManager.promises.getProjectsDocs.should.have.been.calledWith(
+ this.MongoManager.getProjectsDocs.should.have.been.calledWith(
this.project_id,
{ include_deleted: false },
this.filter
)
})
+ it('should fix comment ids', async function () {
+ expect(this.RangeManager.fixCommentIds).to.have.been.called
+ })
+
it('should return the docs', function () {
expect(this.result).to.deep.equal(this.docs)
})
@@ -286,13 +318,10 @@ describe('DocManager', function () {
describe('when there are no docs for the project', function () {
it('should return a NotFoundError', async function () {
- this.MongoManager.promises.getProjectsDocs.resolves(null)
- this.DocArchiveManager.promises.unArchiveAllDocs.resolves(null)
+ this.MongoManager.getProjectsDocs.resolves(null)
+ this.DocArchiveManager.unArchiveAllDocs.resolves(null)
await expect(
- this.DocManager.promises.getAllNonDeletedDocs(
- this.project_id,
- this.filter
- )
+ this.DocManager.getAllNonDeletedDocs(this.project_id, this.filter)
).to.be.rejectedWith(`No docs for project ${this.project_id}`)
})
})
@@ -303,7 +332,7 @@ describe('DocManager', function () {
beforeEach(function () {
this.lines = ['mock', 'doc', 'lines']
this.rev = 77
- this.MongoManager.promises.findDoc.resolves({
+ this.MongoManager.findDoc.resolves({
_id: new ObjectId(this.doc_id),
})
this.meta = {}
@@ -311,7 +340,7 @@ describe('DocManager', function () {
describe('standard path', function () {
beforeEach(async function () {
- await this.DocManager.promises.patchDoc(
+ await this.DocManager.patchDoc(
this.project_id,
this.doc_id,
this.meta
@@ -319,14 +348,14 @@ describe('DocManager', function () {
})
it('should get the doc', function () {
- expect(this.MongoManager.promises.findDoc).to.have.been.calledWith(
+ expect(this.MongoManager.findDoc).to.have.been.calledWith(
this.project_id,
this.doc_id
)
})
it('should persist the meta', function () {
- expect(this.MongoManager.promises.patchDoc).to.have.been.calledWith(
+ expect(this.MongoManager.patchDoc).to.have.been.calledWith(
this.project_id,
this.doc_id,
this.meta
@@ -339,7 +368,7 @@ describe('DocManager', function () {
this.settings.docstore.archiveOnSoftDelete = false
this.meta.deleted = true
- await this.DocManager.promises.patchDoc(
+ await this.DocManager.patchDoc(
this.project_id,
this.doc_id,
this.meta
@@ -347,8 +376,7 @@ describe('DocManager', function () {
})
it('should not flush the doc out of mongo', function () {
- expect(this.DocArchiveManager.promises.archiveDoc).to.not.have.been
- .called
+ expect(this.DocArchiveManager.archiveDoc).to.not.have.been.called
})
})
@@ -356,7 +384,7 @@ describe('DocManager', function () {
beforeEach(async function () {
this.settings.docstore.archiveOnSoftDelete = false
this.meta.deleted = false
- await this.DocManager.promises.patchDoc(
+ await this.DocManager.patchDoc(
this.project_id,
this.doc_id,
this.meta
@@ -364,8 +392,7 @@ describe('DocManager', function () {
})
it('should not flush the doc out of mongo', function () {
- expect(this.DocArchiveManager.promises.archiveDoc).to.not.have.been
- .called
+ expect(this.DocArchiveManager.archiveDoc).to.not.have.been.called
})
})
@@ -377,7 +404,7 @@ describe('DocManager', function () {
describe('when the background flush succeeds', function () {
beforeEach(async function () {
- await this.DocManager.promises.patchDoc(
+ await this.DocManager.patchDoc(
this.project_id,
this.doc_id,
this.meta
@@ -389,17 +416,18 @@ describe('DocManager', function () {
})
it('should flush the doc out of mongo', function () {
- expect(
- this.DocArchiveManager.promises.archiveDoc
- ).to.have.been.calledWith(this.project_id, this.doc_id)
+ expect(this.DocArchiveManager.archiveDoc).to.have.been.calledWith(
+ this.project_id,
+ this.doc_id
+ )
})
})
describe('when the background flush fails', function () {
beforeEach(async function () {
this.err = new Error('foo')
- this.DocArchiveManager.promises.archiveDoc.rejects(this.err)
- await this.DocManager.promises.patchDoc(
+ this.DocArchiveManager.archiveDoc.rejects(this.err)
+ await this.DocManager.patchDoc(
this.project_id,
this.doc_id,
this.meta
@@ -422,9 +450,9 @@ describe('DocManager', function () {
describe('when the doc does not exist', function () {
it('should return a NotFoundError', async function () {
- this.MongoManager.promises.findDoc.resolves(null)
+ this.MongoManager.findDoc.resolves(null)
await expect(
- this.DocManager.promises.patchDoc(this.project_id, this.doc_id, {})
+ this.DocManager.patchDoc(this.project_id, this.doc_id, {})
).to.be.rejectedWith(
`No such project/doc to delete: ${this.project_id}/${this.doc_id}`
)
@@ -470,13 +498,13 @@ describe('DocManager', function () {
ranges: this.originalRanges,
}
- this.DocManager.promises._getDoc = sinon.stub()
+ this.DocManager._getDoc = sinon.stub()
})
describe('when only the doc lines have changed', function () {
beforeEach(async function () {
- this.DocManager.promises._getDoc = sinon.stub().resolves(this.doc)
- this.result = await this.DocManager.promises.updateDoc(
+ this.DocManager._getDoc = sinon.stub().resolves(this.doc)
+ this.result = await this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.newDocLines,
@@ -486,7 +514,7 @@ describe('DocManager', function () {
})
it('should get the existing doc', function () {
- this.DocManager.promises._getDoc
+ this.DocManager._getDoc
.calledWith(this.project_id, this.doc_id, {
version: true,
rev: true,
@@ -498,7 +526,7 @@ describe('DocManager', function () {
})
it('should upsert the document to the doc collection', function () {
- this.MongoManager.promises.upsertIntoDocCollection
+ this.MongoManager.upsertIntoDocCollection
.calledWith(this.project_id, this.doc_id, this.rev, {
lines: this.newDocLines,
})
@@ -512,9 +540,9 @@ describe('DocManager', function () {
describe('when the doc ranges have changed', function () {
beforeEach(async function () {
- this.DocManager.promises._getDoc = sinon.stub().resolves(this.doc)
+ this.DocManager._getDoc = sinon.stub().resolves(this.doc)
this.RangeManager.shouldUpdateRanges.returns(true)
- this.result = await this.DocManager.promises.updateDoc(
+ this.result = await this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.oldDocLines,
@@ -524,7 +552,7 @@ describe('DocManager', function () {
})
it('should upsert the ranges', function () {
- this.MongoManager.promises.upsertIntoDocCollection
+ this.MongoManager.upsertIntoDocCollection
.calledWith(this.project_id, this.doc_id, this.rev, {
ranges: this.newRanges,
})
@@ -538,8 +566,8 @@ describe('DocManager', function () {
describe('when only the version has changed', function () {
beforeEach(async function () {
- this.DocManager.promises._getDoc = sinon.stub().resolves(this.doc)
- this.result = await this.DocManager.promises.updateDoc(
+ this.DocManager._getDoc = sinon.stub().resolves(this.doc)
+ this.result = await this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.oldDocLines,
@@ -549,7 +577,7 @@ describe('DocManager', function () {
})
it('should update the version', function () {
- this.MongoManager.promises.upsertIntoDocCollection.should.have.been.calledWith(
+ this.MongoManager.upsertIntoDocCollection.should.have.been.calledWith(
this.project_id,
this.doc_id,
this.rev,
@@ -564,8 +592,8 @@ describe('DocManager', function () {
describe('when the doc has not changed at all', function () {
beforeEach(async function () {
- this.DocManager.promises._getDoc = sinon.stub().resolves(this.doc)
- this.result = await this.DocManager.promises.updateDoc(
+ this.DocManager._getDoc = sinon.stub().resolves(this.doc)
+ this.result = await this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.oldDocLines,
@@ -575,9 +603,7 @@ describe('DocManager', function () {
})
it('should not update the ranges or lines or version', function () {
- this.MongoManager.promises.upsertIntoDocCollection.called.should.equal(
- false
- )
+ this.MongoManager.upsertIntoDocCollection.called.should.equal(false)
})
it('should return the old rev and modified == false', function () {
@@ -588,7 +614,7 @@ describe('DocManager', function () {
describe('when the version is null', function () {
it('should return an error', async function () {
await expect(
- this.DocManager.promises.updateDoc(
+ this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.newDocLines,
@@ -602,7 +628,7 @@ describe('DocManager', function () {
describe('when the lines are null', function () {
it('should return an error', async function () {
await expect(
- this.DocManager.promises.updateDoc(
+ this.DocManager.updateDoc(
this.project_id,
this.doc_id,
null,
@@ -616,7 +642,7 @@ describe('DocManager', function () {
describe('when the ranges are null', function () {
it('should return an error', async function () {
await expect(
- this.DocManager.promises.updateDoc(
+ this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.newDocLines,
@@ -630,9 +656,9 @@ describe('DocManager', function () {
describe('when there is a generic error getting the doc', function () {
beforeEach(async function () {
this.error = new Error('doc could not be found')
- this.DocManager.promises._getDoc = sinon.stub().rejects(this.error)
+ this.DocManager._getDoc = sinon.stub().rejects(this.error)
await expect(
- this.DocManager.promises.updateDoc(
+ this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.newDocLines,
@@ -643,16 +669,15 @@ describe('DocManager', function () {
})
it('should not upsert the document to the doc collection', function () {
- this.MongoManager.promises.upsertIntoDocCollection.should.not.have.been
- .called
+ this.MongoManager.upsertIntoDocCollection.should.not.have.been.called
})
})
describe('when the version was decremented', function () {
it('should return an error', async function () {
- this.DocManager.promises._getDoc = sinon.stub().resolves(this.doc)
+ this.DocManager._getDoc = sinon.stub().resolves(this.doc)
await expect(
- this.DocManager.promises.updateDoc(
+ this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.newDocLines,
@@ -665,8 +690,8 @@ describe('DocManager', function () {
describe('when the doc lines have not changed', function () {
beforeEach(async function () {
- this.DocManager.promises._getDoc = sinon.stub().resolves(this.doc)
- this.result = await this.DocManager.promises.updateDoc(
+ this.DocManager._getDoc = sinon.stub().resolves(this.doc)
+ this.result = await this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.oldDocLines.slice(),
@@ -676,9 +701,7 @@ describe('DocManager', function () {
})
it('should not update the doc', function () {
- this.MongoManager.promises.upsertIntoDocCollection.called.should.equal(
- false
- )
+ this.MongoManager.upsertIntoDocCollection.called.should.equal(false)
})
it('should return the existing rev', function () {
@@ -688,8 +711,8 @@ describe('DocManager', function () {
describe('when the doc does not exist', function () {
beforeEach(async function () {
- this.DocManager.promises._getDoc = sinon.stub().resolves(null)
- this.result = await this.DocManager.promises.updateDoc(
+ this.DocManager._getDoc = sinon.stub().resolves(null)
+ this.result = await this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.newDocLines,
@@ -699,7 +722,7 @@ describe('DocManager', function () {
})
it('should upsert the document to the doc collection', function () {
- this.MongoManager.promises.upsertIntoDocCollection.should.have.been.calledWith(
+ this.MongoManager.upsertIntoDocCollection.should.have.been.calledWith(
this.project_id,
this.doc_id,
undefined,
@@ -718,12 +741,12 @@ describe('DocManager', function () {
describe('when another update is racing', function () {
beforeEach(async function () {
- this.DocManager.promises._getDoc = sinon.stub().resolves(this.doc)
- this.MongoManager.promises.upsertIntoDocCollection
+ this.DocManager._getDoc = sinon.stub().resolves(this.doc)
+ this.MongoManager.upsertIntoDocCollection
.onFirstCall()
.rejects(new Errors.DocRevValueError())
this.RangeManager.shouldUpdateRanges.returns(true)
- this.result = await this.DocManager.promises.updateDoc(
+ this.result = await this.DocManager.updateDoc(
this.project_id,
this.doc_id,
this.newDocLines,
@@ -733,7 +756,7 @@ describe('DocManager', function () {
})
it('should upsert the doc twice', function () {
- this.MongoManager.promises.upsertIntoDocCollection.should.have.been.calledWith(
+ this.MongoManager.upsertIntoDocCollection.should.have.been.calledWith(
this.project_id,
this.doc_id,
this.rev,
@@ -743,8 +766,7 @@ describe('DocManager', function () {
version: this.version + 1,
}
)
- this.MongoManager.promises.upsertIntoDocCollection.should.have.been
- .calledTwice
+ this.MongoManager.upsertIntoDocCollection.should.have.been.calledTwice
})
it('should return the new rev', function () {
diff --git a/services/docstore/test/unit/js/HttpControllerTests.js b/services/docstore/test/unit/js/HttpControllerTests.js
index bf78696890..ab491ec150 100644
--- a/services/docstore/test/unit/js/HttpControllerTests.js
+++ b/services/docstore/test/unit/js/HttpControllerTests.js
@@ -14,7 +14,7 @@ describe('HttpController', function () {
max_doc_length: 2 * 1024 * 1024,
}
this.DocArchiveManager = {
- unArchiveAllDocs: sinon.stub().yields(),
+ unArchiveAllDocs: sinon.stub().returns(),
}
this.DocManager = {}
this.HttpController = SandboxedModule.require(modulePath, {
@@ -54,15 +54,13 @@ describe('HttpController', function () {
describe('getDoc', function () {
describe('without deleted docs', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.params = {
project_id: this.projectId,
doc_id: this.docId,
}
- this.DocManager.getFullDoc = sinon
- .stub()
- .callsArgWith(2, null, this.doc)
- this.HttpController.getDoc(this.req, this.res, this.next)
+ this.DocManager.getFullDoc = sinon.stub().resolves(this.doc)
+ await this.HttpController.getDoc(this.req, this.res, this.next)
})
it('should get the document with the version (including deleted)', function () {
@@ -89,26 +87,24 @@ describe('HttpController', function () {
project_id: this.projectId,
doc_id: this.docId,
}
- this.DocManager.getFullDoc = sinon
- .stub()
- .callsArgWith(2, null, this.deletedDoc)
+ this.DocManager.getFullDoc = sinon.stub().resolves(this.deletedDoc)
})
- it('should get the doc from the doc manager', function () {
- this.HttpController.getDoc(this.req, this.res, this.next)
+ it('should get the doc from the doc manager', async function () {
+ await this.HttpController.getDoc(this.req, this.res, this.next)
this.DocManager.getFullDoc
.calledWith(this.projectId, this.docId)
.should.equal(true)
})
- it('should return 404 if the query string delete is not set ', function () {
- this.HttpController.getDoc(this.req, this.res, this.next)
+ it('should return 404 if the query string delete is not set ', async function () {
+ await this.HttpController.getDoc(this.req, this.res, this.next)
this.res.sendStatus.calledWith(404).should.equal(true)
})
- it('should return the doc as JSON if include_deleted is set to true', function () {
+ it('should return the doc as JSON if include_deleted is set to true', async function () {
this.req.query.include_deleted = 'true'
- this.HttpController.getDoc(this.req, this.res, this.next)
+ await this.HttpController.getDoc(this.req, this.res, this.next)
this.res.json
.calledWith({
_id: this.docId,
@@ -123,13 +119,15 @@ describe('HttpController', function () {
})
describe('getRawDoc', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.params = {
project_id: this.projectId,
doc_id: this.docId,
}
- this.DocManager.getDocLines = sinon.stub().callsArgWith(2, null, this.doc)
- this.HttpController.getRawDoc(this.req, this.res, this.next)
+ this.DocManager.getDocLines = sinon
+ .stub()
+ .resolves(this.doc.lines.join('\n'))
+ await this.HttpController.getRawDoc(this.req, this.res, this.next)
})
it('should get the document without the version', function () {
@@ -154,7 +152,7 @@ describe('HttpController', function () {
describe('getAllDocs', function () {
describe('normally', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.params = { project_id: this.projectId }
this.docs = [
{
@@ -168,10 +166,8 @@ describe('HttpController', function () {
rev: 4,
},
]
- this.DocManager.getAllNonDeletedDocs = sinon
- .stub()
- .callsArgWith(2, null, this.docs)
- this.HttpController.getAllDocs(this.req, this.res, this.next)
+ this.DocManager.getAllNonDeletedDocs = sinon.stub().resolves(this.docs)
+ await this.HttpController.getAllDocs(this.req, this.res, this.next)
})
it('should get all the (non-deleted) docs', function () {
@@ -199,7 +195,7 @@ describe('HttpController', function () {
})
describe('with null lines', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.params = { project_id: this.projectId }
this.docs = [
{
@@ -213,10 +209,8 @@ describe('HttpController', function () {
rev: 4,
},
]
- this.DocManager.getAllNonDeletedDocs = sinon
- .stub()
- .callsArgWith(2, null, this.docs)
- this.HttpController.getAllDocs(this.req, this.res, this.next)
+ this.DocManager.getAllNonDeletedDocs = sinon.stub().resolves(this.docs)
+ await this.HttpController.getAllDocs(this.req, this.res, this.next)
})
it('should return the doc with fallback lines', function () {
@@ -238,7 +232,7 @@ describe('HttpController', function () {
})
describe('with a null doc', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.params = { project_id: this.projectId }
this.docs = [
{
@@ -253,10 +247,8 @@ describe('HttpController', function () {
rev: 4,
},
]
- this.DocManager.getAllNonDeletedDocs = sinon
- .stub()
- .callsArgWith(2, null, this.docs)
- this.HttpController.getAllDocs(this.req, this.res, this.next)
+ this.DocManager.getAllNonDeletedDocs = sinon.stub().resolves(this.docs)
+ await this.HttpController.getAllDocs(this.req, this.res, this.next)
})
it('should return the non null docs as JSON', function () {
@@ -292,7 +284,7 @@ describe('HttpController', function () {
describe('getAllRanges', function () {
describe('normally', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.params = { project_id: this.projectId }
this.docs = [
{
@@ -304,10 +296,8 @@ describe('HttpController', function () {
ranges: { mock_ranges: 'two' },
},
]
- this.DocManager.getAllNonDeletedDocs = sinon
- .stub()
- .callsArgWith(2, null, this.docs)
- this.HttpController.getAllRanges(this.req, this.res, this.next)
+ this.DocManager.getAllNonDeletedDocs = sinon.stub().resolves(this.docs)
+ await this.HttpController.getAllRanges(this.req, this.res, this.next)
})
it('should get all the (non-deleted) doc ranges', function () {
@@ -342,16 +332,17 @@ describe('HttpController', function () {
})
describe('when the doc lines exist and were updated', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.body = {
lines: (this.lines = ['hello', 'world']),
version: (this.version = 42),
ranges: (this.ranges = { changes: 'mock' }),
}
+ this.rev = 5
this.DocManager.updateDoc = sinon
.stub()
- .yields(null, true, (this.rev = 5))
- this.HttpController.updateDoc(this.req, this.res, this.next)
+ .resolves({ modified: true, rev: this.rev })
+ await this.HttpController.updateDoc(this.req, this.res, this.next)
})
it('should update the document', function () {
@@ -374,16 +365,17 @@ describe('HttpController', function () {
})
describe('when the doc lines exist and were not updated', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.body = {
lines: (this.lines = ['hello', 'world']),
version: (this.version = 42),
ranges: {},
}
+ this.rev = 5
this.DocManager.updateDoc = sinon
.stub()
- .yields(null, false, (this.rev = 5))
- this.HttpController.updateDoc(this.req, this.res, this.next)
+ .resolves({ modified: false, rev: this.rev })
+ await this.HttpController.updateDoc(this.req, this.res, this.next)
})
it('should return a modified status', function () {
@@ -394,10 +386,12 @@ describe('HttpController', function () {
})
describe('when the doc lines are not provided', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.body = { version: 42, ranges: {} }
- this.DocManager.updateDoc = sinon.stub().yields(null, false)
- this.HttpController.updateDoc(this.req, this.res, this.next)
+ this.DocManager.updateDoc = sinon
+ .stub()
+ .resolves({ modified: false, rev: 0 })
+ await this.HttpController.updateDoc(this.req, this.res, this.next)
})
it('should not update the document', function () {
@@ -410,10 +404,12 @@ describe('HttpController', function () {
})
describe('when the doc version are not provided', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.body = { version: 42, lines: ['hello world'] }
- this.DocManager.updateDoc = sinon.stub().yields(null, false)
- this.HttpController.updateDoc(this.req, this.res, this.next)
+ this.DocManager.updateDoc = sinon
+ .stub()
+ .resolves({ modified: false, rev: 0 })
+ await this.HttpController.updateDoc(this.req, this.res, this.next)
})
it('should not update the document', function () {
@@ -426,10 +422,12 @@ describe('HttpController', function () {
})
describe('when the doc ranges is not provided', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.body = { lines: ['foo'], version: 42 }
- this.DocManager.updateDoc = sinon.stub().yields(null, false)
- this.HttpController.updateDoc(this.req, this.res, this.next)
+ this.DocManager.updateDoc = sinon
+ .stub()
+ .resolves({ modified: false, rev: 0 })
+ await this.HttpController.updateDoc(this.req, this.res, this.next)
})
it('should not update the document', function () {
@@ -442,13 +440,20 @@ describe('HttpController', function () {
})
describe('when the doc body is too large', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.body = {
lines: (this.lines = Array(2049).fill('a'.repeat(1024))),
version: (this.version = 42),
ranges: (this.ranges = { changes: 'mock' }),
}
- this.HttpController.updateDoc(this.req, this.res, this.next)
+ this.DocManager.updateDoc = sinon
+ .stub()
+ .resolves({ modified: false, rev: 0 })
+ await this.HttpController.updateDoc(this.req, this.res, this.next)
+ })
+
+ it('should not update the document', function () {
+ this.DocManager.updateDoc.called.should.equal(false)
})
it('should return a 413 (too large) response', function () {
@@ -462,14 +467,14 @@ describe('HttpController', function () {
})
describe('patchDoc', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.params = {
project_id: this.projectId,
doc_id: this.docId,
}
this.req.body = { name: 'foo.tex' }
- this.DocManager.patchDoc = sinon.stub().yields(null)
- this.HttpController.patchDoc(this.req, this.res, this.next)
+ this.DocManager.patchDoc = sinon.stub().resolves()
+ await this.HttpController.patchDoc(this.req, this.res, this.next)
})
it('should delete the document', function () {
@@ -484,11 +489,11 @@ describe('HttpController', function () {
})
describe('with an invalid payload', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.body = { cannot: 'happen' }
- this.DocManager.patchDoc = sinon.stub().yields(null)
- this.HttpController.patchDoc(this.req, this.res, this.next)
+ this.DocManager.patchDoc = sinon.stub().resolves()
+ await this.HttpController.patchDoc(this.req, this.res, this.next)
})
it('should log a message', function () {
@@ -509,10 +514,10 @@ describe('HttpController', function () {
})
describe('archiveAllDocs', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.params = { project_id: this.projectId }
- this.DocArchiveManager.archiveAllDocs = sinon.stub().callsArg(1)
- this.HttpController.archiveAllDocs(this.req, this.res, this.next)
+ this.DocArchiveManager.archiveAllDocs = sinon.stub().resolves()
+ await this.HttpController.archiveAllDocs(this.req, this.res, this.next)
})
it('should archive the project', function () {
@@ -532,9 +537,12 @@ describe('HttpController', function () {
})
describe('on success', function () {
- beforeEach(function (done) {
- this.res.sendStatus.callsFake(() => done())
- this.HttpController.unArchiveAllDocs(this.req, this.res, this.next)
+ beforeEach(async function () {
+ await this.HttpController.unArchiveAllDocs(
+ this.req,
+ this.res,
+ this.next
+ )
})
it('returns a 200', function () {
@@ -543,12 +551,15 @@ describe('HttpController', function () {
})
describe("when the archived rev doesn't match", function () {
- beforeEach(function (done) {
- this.res.sendStatus.callsFake(() => done())
- this.DocArchiveManager.unArchiveAllDocs.yields(
+ beforeEach(async function () {
+ this.DocArchiveManager.unArchiveAllDocs.rejects(
new Errors.DocRevValueError('bad rev')
)
- this.HttpController.unArchiveAllDocs(this.req, this.res, this.next)
+ await this.HttpController.unArchiveAllDocs(
+ this.req,
+ this.res,
+ this.next
+ )
})
it('returns a 409', function () {
@@ -558,10 +569,10 @@ describe('HttpController', function () {
})
describe('destroyProject', function () {
- beforeEach(function () {
+ beforeEach(async function () {
this.req.params = { project_id: this.projectId }
- this.DocArchiveManager.destroyProject = sinon.stub().callsArg(1)
- this.HttpController.destroyProject(this.req, this.res, this.next)
+ this.DocArchiveManager.destroyProject = sinon.stub().resolves()
+ await this.HttpController.destroyProject(this.req, this.res, this.next)
})
it('should destroy the docs', function () {
diff --git a/services/docstore/test/unit/js/MongoManagerTests.js b/services/docstore/test/unit/js/MongoManagerTests.js
index 4f8467db76..b96b661df4 100644
--- a/services/docstore/test/unit/js/MongoManagerTests.js
+++ b/services/docstore/test/unit/js/MongoManagerTests.js
@@ -41,7 +41,7 @@ describe('MongoManager', function () {
this.doc = { name: 'mock-doc' }
this.db.docs.findOne = sinon.stub().resolves(this.doc)
this.filter = { lines: true }
- this.result = await this.MongoManager.promises.findDoc(
+ this.result = await this.MongoManager.findDoc(
this.projectId,
this.docId,
this.filter
@@ -70,11 +70,7 @@ describe('MongoManager', function () {
describe('patchDoc', function () {
beforeEach(async function () {
this.meta = { name: 'foo.tex' }
- await this.MongoManager.promises.patchDoc(
- this.projectId,
- this.docId,
- this.meta
- )
+ await this.MongoManager.patchDoc(this.projectId, this.docId, this.meta)
})
it('should pass the parameter along', function () {
@@ -104,7 +100,7 @@ describe('MongoManager', function () {
describe('with included_deleted = false', function () {
beforeEach(async function () {
- this.result = await this.MongoManager.promises.getProjectsDocs(
+ this.result = await this.MongoManager.getProjectsDocs(
this.projectId,
{ include_deleted: false },
this.filter
@@ -132,7 +128,7 @@ describe('MongoManager', function () {
describe('with included_deleted = true', function () {
beforeEach(async function () {
- this.result = await this.MongoManager.promises.getProjectsDocs(
+ this.result = await this.MongoManager.getProjectsDocs(
this.projectId,
{ include_deleted: true },
this.filter
@@ -167,7 +163,7 @@ describe('MongoManager', function () {
this.db.docs.find = sinon.stub().returns({
toArray: sinon.stub().resolves([this.doc1, this.doc2, this.doc3]),
})
- this.result = await this.MongoManager.promises.getProjectsDeletedDocs(
+ this.result = await this.MongoManager.getProjectsDeletedDocs(
this.projectId,
this.filter
)
@@ -203,7 +199,7 @@ describe('MongoManager', function () {
})
it('should upsert the document', async function () {
- await this.MongoManager.promises.upsertIntoDocCollection(
+ await this.MongoManager.upsertIntoDocCollection(
this.projectId,
this.docId,
this.oldRev,
@@ -223,7 +219,7 @@ describe('MongoManager', function () {
it('should handle update error', async function () {
this.db.docs.updateOne.rejects(this.stubbedErr)
await expect(
- this.MongoManager.promises.upsertIntoDocCollection(
+ this.MongoManager.upsertIntoDocCollection(
this.projectId,
this.docId,
this.rev,
@@ -235,7 +231,7 @@ describe('MongoManager', function () {
})
it('should insert without a previous rev', async function () {
- await this.MongoManager.promises.upsertIntoDocCollection(
+ await this.MongoManager.upsertIntoDocCollection(
this.projectId,
this.docId,
null,
@@ -254,7 +250,7 @@ describe('MongoManager', function () {
it('should handle generic insert error', async function () {
this.db.docs.insertOne.rejects(this.stubbedErr)
await expect(
- this.MongoManager.promises.upsertIntoDocCollection(
+ this.MongoManager.upsertIntoDocCollection(
this.projectId,
this.docId,
null,
@@ -266,7 +262,7 @@ describe('MongoManager', function () {
it('should handle duplicate insert error', async function () {
this.db.docs.insertOne.rejects({ code: 11000 })
await expect(
- this.MongoManager.promises.upsertIntoDocCollection(
+ this.MongoManager.upsertIntoDocCollection(
this.projectId,
this.docId,
null,
@@ -280,7 +276,7 @@ describe('MongoManager', function () {
beforeEach(async function () {
this.projectId = new ObjectId()
this.db.docs.deleteMany = sinon.stub().resolves()
- await this.MongoManager.promises.destroyProject(this.projectId)
+ await this.MongoManager.destroyProject(this.projectId)
})
it('should destroy all docs', function () {
@@ -297,13 +293,13 @@ describe('MongoManager', function () {
it('should not error when the rev has not changed', async function () {
this.db.docs.findOne = sinon.stub().resolves({ rev: 1 })
- await this.MongoManager.promises.checkRevUnchanged(this.doc)
+ await this.MongoManager.checkRevUnchanged(this.doc)
})
it('should return an error when the rev has changed', async function () {
this.db.docs.findOne = sinon.stub().resolves({ rev: 2 })
await expect(
- this.MongoManager.promises.checkRevUnchanged(this.doc)
+ this.MongoManager.checkRevUnchanged(this.doc)
).to.be.rejectedWith(Errors.DocModifiedError)
})
@@ -311,14 +307,14 @@ describe('MongoManager', function () {
this.db.docs.findOne = sinon.stub().resolves({ rev: 2 })
this.doc = { _id: new ObjectId(), name: 'mock-doc', rev: NaN }
await expect(
- this.MongoManager.promises.checkRevUnchanged(this.doc)
+ this.MongoManager.checkRevUnchanged(this.doc)
).to.be.rejectedWith(Errors.DocRevValueError)
})
it('should return a value error if checked doc rev is NaN', async function () {
this.db.docs.findOne = sinon.stub().resolves({ rev: NaN })
await expect(
- this.MongoManager.promises.checkRevUnchanged(this.doc)
+ this.MongoManager.checkRevUnchanged(this.doc)
).to.be.rejectedWith(Errors.DocRevValueError)
})
})
@@ -334,7 +330,7 @@ describe('MongoManager', function () {
describe('complete doc', function () {
beforeEach(async function () {
- await this.MongoManager.promises.restoreArchivedDoc(
+ await this.MongoManager.restoreArchivedDoc(
this.projectId,
this.docId,
this.archivedDoc
@@ -364,7 +360,7 @@ describe('MongoManager', function () {
describe('without ranges', function () {
beforeEach(async function () {
delete this.archivedDoc.ranges
- await this.MongoManager.promises.restoreArchivedDoc(
+ await this.MongoManager.restoreArchivedDoc(
this.projectId,
this.docId,
this.archivedDoc
@@ -395,7 +391,7 @@ describe('MongoManager', function () {
it('throws a DocRevValueError', async function () {
this.db.docs.updateOne.resolves({ matchedCount: 0 })
await expect(
- this.MongoManager.promises.restoreArchivedDoc(
+ this.MongoManager.restoreArchivedDoc(
this.projectId,
this.docId,
this.archivedDoc
diff --git a/services/docstore/test/unit/js/RangeManagerTests.js b/services/docstore/test/unit/js/RangeManagerTests.js
index 7a2de7352e..ba99280a7a 100644
--- a/services/docstore/test/unit/js/RangeManagerTests.js
+++ b/services/docstore/test/unit/js/RangeManagerTests.js
@@ -30,7 +30,7 @@ describe('RangeManager', function () {
})
describe('jsonRangesToMongo', function () {
- it('should convert ObjectIds and dates to proper objects', function () {
+ it('should convert ObjectIds and dates to proper objects and fix comment id', function () {
const changeId = new ObjectId().toString()
const commentId = new ObjectId().toString()
const userId = new ObjectId().toString()
@@ -66,7 +66,7 @@ describe('RangeManager', function () {
],
comments: [
{
- id: new ObjectId(commentId),
+ id: new ObjectId(threadId),
op: { c: 'foo', p: 3, t: new ObjectId(threadId) },
},
],
@@ -110,7 +110,6 @@ describe('RangeManager', function () {
return it('should be consistent when transformed through json -> mongo -> json', function () {
const changeId = new ObjectId().toString()
- const commentId = new ObjectId().toString()
const userId = new ObjectId().toString()
const threadId = new ObjectId().toString()
const ts = new Date().toJSON()
@@ -127,7 +126,7 @@ describe('RangeManager', function () {
],
comments: [
{
- id: commentId,
+ id: threadId,
op: { c: 'foo', p: 3, t: threadId },
},
],
@@ -142,6 +141,7 @@ describe('RangeManager', function () {
return describe('shouldUpdateRanges', function () {
beforeEach(function () {
+ const threadId = new ObjectId()
this.ranges = {
changes: [
{
@@ -155,8 +155,8 @@ describe('RangeManager', function () {
],
comments: [
{
- id: new ObjectId(),
- op: { c: 'foo', p: 3, t: new ObjectId() },
+ id: threadId,
+ op: { c: 'foo', p: 3, t: threadId },
},
],
}
diff --git a/services/document-updater/.gitignore b/services/document-updater/.gitignore
deleted file mode 100644
index 624e78f096..0000000000
--- a/services/document-updater/.gitignore
+++ /dev/null
@@ -1,52 +0,0 @@
-compileFolder
-
-Compiled source #
-###################
-*.com
-*.class
-*.dll
-*.exe
-*.o
-*.so
-
-# Packages #
-############
-# it's better to unpack these files and commit the raw source
-# git has its own built in compression methods
-*.7z
-*.dmg
-*.gz
-*.iso
-*.jar
-*.rar
-*.tar
-*.zip
-
-# Logs and databases #
-######################
-*.log
-*.sql
-*.sqlite
-
-# OS generated files #
-######################
-.DS_Store?
-ehthumbs.db
-Icon?
-Thumbs.db
-
-/node_modules/*
-
-
-
-forever/
-
-**.swp
-
-# Redis cluster
-**/appendonly.aof
-**/dump.rdb
-**/nodes.conf
-
-# managed by dev-environment$ bin/update_build_scripts
-.npmrc
diff --git a/services/document-updater/.nvmrc b/services/document-updater/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/document-updater/.nvmrc
+++ b/services/document-updater/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/document-updater/Dockerfile b/services/document-updater/Dockerfile
index 220db82d99..720b619c41 100644
--- a/services/document-updater/Dockerfile
+++ b/services/document-updater/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/document-updater
diff --git a/services/document-updater/Makefile b/services/document-updater/Makefile
index 891f7c831f..46dfced5c9 100644
--- a/services/document-updater/Makefile
+++ b/services/document-updater/Makefile
@@ -32,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -63,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -98,13 +116,6 @@ test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) up -d mongo
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) exec -T mongo sh -c ' \
- while ! mongosh --eval "db.version()" > /dev/null; do \
- echo "Waiting for Mongo..."; \
- sleep 1; \
- done; \
- mongosh --eval "rs.initiate({ _id: \"overleaf\", members: [ { _id: 0, host: \"mongo:27017\" } ] })"'
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif
@@ -137,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/document-updater/app.js b/services/document-updater/app.js
index 9466c188ac..65c9895377 100644
--- a/services/document-updater/app.js
+++ b/services/document-updater/app.js
@@ -135,6 +135,10 @@ app.use((req, res, next) => {
})
app.get('/project/:project_id/doc/:doc_id', HttpController.getDoc)
+app.get(
+ '/project/:project_id/doc/:doc_id/comment/:comment_id',
+ HttpController.getComment
+)
app.get('/project/:project_id/doc/:doc_id/peek', HttpController.peekDoc)
// temporarily keep the GET method for backwards compatibility
app.get('/project/:project_id/doc', HttpController.getProjectDocsAndFlushIfOld)
@@ -143,6 +147,10 @@ app.post(
'/project/:project_id/get_and_flush_if_old',
HttpController.getProjectDocsAndFlushIfOld
)
+app.get(
+ '/project/:project_id/last_updated_at',
+ HttpController.getProjectLastUpdatedAt
+)
app.post('/project/:project_id/clearState', HttpController.clearProjectState)
app.post('/project/:project_id/doc/:doc_id', HttpController.setDoc)
app.post('/project/:project_id/doc/:doc_id/append', HttpController.appendToDoc)
diff --git a/services/document-updater/app/js/DiffCodec.js b/services/document-updater/app/js/DiffCodec.js
index 245903ca13..17da409386 100644
--- a/services/document-updater/app/js/DiffCodec.js
+++ b/services/document-updater/app/js/DiffCodec.js
@@ -1,4 +1,6 @@
+const OError = require('@overleaf/o-error')
const DMP = require('diff-match-patch')
+const { TextOperation } = require('overleaf-editor-core')
const dmp = new DMP()
// Do not attempt to produce a diff for more than 100ms
@@ -16,8 +18,7 @@ module.exports = {
const ops = []
let position = 0
for (const diff of diffs) {
- const type = diff[0]
- const content = diff[1]
+ const [type, content] = diff
if (type === this.ADDED) {
ops.push({
i: content,
@@ -37,4 +38,63 @@ module.exports = {
}
return ops
},
+
+ /**
+ * @param {import("overleaf-editor-core").StringFileData} file
+ * @param {string} after
+ * @return {TextOperation}
+ */
+ diffAsHistoryOTEditOperation(file, after) {
+ const beforeWithoutTrackedDeletes = file.getContent({
+ filterTrackedDeletes: true,
+ })
+ const diffs = dmp.diff_main(beforeWithoutTrackedDeletes, after)
+ dmp.diff_cleanupSemantic(diffs)
+
+ const trackedChanges = file.trackedChanges.asSorted()
+ let nextTc = trackedChanges.shift()
+
+ const op = new TextOperation()
+ for (const diff of diffs) {
+ let [type, content] = diff
+ if (type === this.ADDED) {
+ op.insert(content)
+ } else if (type === this.REMOVED || type === this.UNCHANGED) {
+ while (op.baseLength + content.length > nextTc?.range.start) {
+ if (nextTc.tracking.type === 'delete') {
+ const untilRange = nextTc.range.start - op.baseLength
+ if (type === this.REMOVED) {
+ op.remove(untilRange)
+ } else if (type === this.UNCHANGED) {
+ op.retain(untilRange)
+ }
+ op.retain(nextTc.range.end - nextTc.range.start)
+ content = content.slice(untilRange)
+ }
+ nextTc = trackedChanges.shift()
+ }
+ if (type === this.REMOVED) {
+ op.remove(content.length)
+ } else if (type === this.UNCHANGED) {
+ op.retain(content.length)
+ }
+ } else {
+ throw new Error('Unknown type')
+ }
+ }
+ while (nextTc) {
+ if (
+ nextTc.tracking.type !== 'delete' ||
+ nextTc.range.start !== op.baseLength
+ ) {
+ throw new OError(
+ 'StringFileData.trackedChanges out of sync: unexpected range after end of diff',
+ { nextTc, baseLength: op.baseLength }
+ )
+ }
+ op.retain(nextTc.range.end - nextTc.range.start)
+ nextTc = trackedChanges.shift()
+ }
+ return op
+ },
}
diff --git a/services/document-updater/app/js/DocumentManager.js b/services/document-updater/app/js/DocumentManager.js
index 1b5598aab8..3fb3d10a6e 100644
--- a/services/document-updater/app/js/DocumentManager.js
+++ b/services/document-updater/app/js/DocumentManager.js
@@ -11,10 +11,16 @@ const RangesManager = require('./RangesManager')
const { extractOriginOrSource } = require('./Utils')
const { getTotalSizeOfLines } = require('./Limits')
const Settings = require('@overleaf/settings')
+const { StringFileData } = require('overleaf-editor-core')
const MAX_UNFLUSHED_AGE = 300 * 1000 // 5 mins, document should be flushed to mongo this time after a change
const DocumentManager = {
+ /**
+ * @param {string} projectId
+ * @param {string} docId
+ * @return {Promise<{lines: (string[] | StringFileRawData), version: number, ranges: Ranges, resolvedCommentIds: any[], pathname: string, projectHistoryId: string, unflushedTime: any, alreadyLoaded: boolean, historyRangesSupport: boolean, type: OTType}>}
+ */
async getDoc(projectId, docId) {
const {
lines,
@@ -75,6 +81,7 @@ const DocumentManager = {
unflushedTime: null,
alreadyLoaded: false,
historyRangesSupport,
+ type: Array.isArray(lines) ? 'sharejs-text-ot' : 'history-ot',
}
} else {
return {
@@ -87,16 +94,25 @@ const DocumentManager = {
unflushedTime,
alreadyLoaded: true,
historyRangesSupport,
+ type: Array.isArray(lines) ? 'sharejs-text-ot' : 'history-ot',
}
}
},
async getDocAndRecentOps(projectId, docId, fromVersion) {
- const { lines, version, ranges, pathname, projectHistoryId } =
+ const { lines, version, ranges, pathname, projectHistoryId, type } =
await DocumentManager.getDoc(projectId, docId)
if (fromVersion === -1) {
- return { lines, version, ops: [], ranges, pathname, projectHistoryId }
+ return {
+ lines,
+ version,
+ ops: [],
+ ranges,
+ pathname,
+ projectHistoryId,
+ type,
+ }
} else {
const ops = await RedisManager.promises.getPreviousDocOps(
docId,
@@ -110,15 +126,21 @@ const DocumentManager = {
ranges,
pathname,
projectHistoryId,
+ type,
}
}
},
async appendToDoc(projectId, docId, linesToAppend, originOrSource, userId) {
- const { lines: currentLines } = await DocumentManager.getDoc(
+ let { lines: currentLines, type } = await DocumentManager.getDoc(
projectId,
docId
)
+ if (type === 'history-ot') {
+ const file = StringFileData.fromRaw(currentLines)
+ // TODO(24596): tc support for history-ot
+ currentLines = file.getLines()
+ }
const currentLineSize = getTotalSizeOfLines(currentLines)
const addedSize = getTotalSizeOfLines(linesToAppend)
const newlineSize = '\n'.length
@@ -153,30 +175,41 @@ const DocumentManager = {
throw new Error('No lines were provided to setDoc')
}
+ // Circular dependencies. Import at runtime.
+ const HistoryOTUpdateManager = require('./HistoryOTUpdateManager')
const UpdateManager = require('./UpdateManager')
+
const {
lines: oldLines,
version,
alreadyLoaded,
+ type,
} = await DocumentManager.getDoc(projectId, docId)
- if (oldLines != null && oldLines.length > 0 && oldLines[0].text != null) {
- logger.debug(
- { docId, projectId, oldLines, newLines },
- 'document is JSON so not updating'
- )
- return
- }
-
logger.debug(
{ docId, projectId, oldLines, newLines },
'setting a document via http'
)
- const op = DiffCodec.diffAsShareJsOp(oldLines, newLines)
- if (undoing) {
- for (const o of op || []) {
- o.u = true
- } // Turn on undo flag for each op for track changes
+
+ let op
+ if (type === 'history-ot') {
+ const file = StringFileData.fromRaw(oldLines)
+ const operation = DiffCodec.diffAsHistoryOTEditOperation(
+ file,
+ newLines.join('\n')
+ )
+ if (operation.isNoop()) {
+ op = []
+ } else {
+ op = [operation.toJSON()]
+ }
+ } else {
+ op = DiffCodec.diffAsShareJsOp(oldLines, newLines)
+ if (undoing) {
+ for (const o of op || []) {
+ o.u = true
+ } // Turn on undo flag for each op for track changes
+ }
}
const { origin, source } = extractOriginOrSource(originOrSource)
@@ -211,7 +244,11 @@ const DocumentManager = {
// this update, otherwise the doc would never be
// removed from redis.
if (op.length > 0) {
- await UpdateManager.promises.applyUpdate(projectId, docId, update)
+ if (type === 'history-ot') {
+ await HistoryOTUpdateManager.applyUpdate(projectId, docId, update)
+ } else {
+ await UpdateManager.promises.applyUpdate(projectId, docId, update)
+ }
}
// If the document was loaded already, then someone has it open
@@ -232,7 +269,7 @@ const DocumentManager = {
},
async flushDocIfLoaded(projectId, docId) {
- const {
+ let {
lines,
version,
ranges,
@@ -253,6 +290,11 @@ const DocumentManager = {
logger.debug({ projectId, docId, version }, 'flushing doc')
Metrics.inc('flush-doc-if-loaded', 1, { status: 'modified' })
+ if (!Array.isArray(lines)) {
+ const file = StringFileData.fromRaw(lines)
+ // TODO(24596): tc support for history-ot
+ lines = file.getLines()
+ }
const result = await PersistenceManager.promises.setDoc(
projectId,
docId,
@@ -302,6 +344,7 @@ const DocumentManager = {
throw new Errors.NotFoundError(`document not found: ${docId}`)
}
+ // TODO(24596): tc support for history-ot
const newRanges = RangesManager.acceptChanges(
projectId,
docId,
@@ -367,6 +410,22 @@ const DocumentManager = {
}
},
+ async getComment(projectId, docId, commentId) {
+ // TODO(24596): tc support for history-ot
+ const { ranges } = await DocumentManager.getDoc(projectId, docId)
+
+ const comment = ranges?.comments?.find(comment => comment.id === commentId)
+
+ if (!comment) {
+ throw new Errors.NotFoundError({
+ message: 'comment not found',
+ info: { commentId },
+ })
+ }
+
+ return { comment }
+ },
+
async deleteComment(projectId, docId, commentId, userId) {
const { lines, version, ranges, pathname, historyRangesSupport } =
await DocumentManager.getDoc(projectId, docId)
@@ -374,6 +433,7 @@ const DocumentManager = {
throw new Errors.NotFoundError(`document not found: ${docId}`)
}
+ // TODO(24596): tc support for history-ot
const newRanges = RangesManager.deleteComment(commentId, ranges)
await RedisManager.promises.updateDocument(
@@ -413,7 +473,7 @@ const DocumentManager = {
},
async getDocAndFlushIfOld(projectId, docId) {
- const { lines, version, unflushedTime, alreadyLoaded } =
+ let { lines, version, unflushedTime, alreadyLoaded } =
await DocumentManager.getDoc(projectId, docId)
// if doc was already loaded see if it needs to be flushed
@@ -425,6 +485,12 @@ const DocumentManager = {
await DocumentManager.flushDocIfLoaded(projectId, docId)
}
+ if (!Array.isArray(lines)) {
+ const file = StringFileData.fromRaw(lines)
+ // TODO(24596): tc support for history-ot
+ lines = file.getLines()
+ }
+
return { lines, version }
},
@@ -500,6 +566,16 @@ const DocumentManager = {
)
},
+ async getCommentWithLock(projectId, docId, commentId) {
+ const UpdateManager = require('./UpdateManager')
+ return await UpdateManager.promises.lockUpdatesAndDo(
+ DocumentManager.getComment,
+ projectId,
+ docId,
+ commentId
+ )
+ },
+
async getDocAndRecentOpsWithLock(projectId, docId, fromVersion) {
const UpdateManager = require('./UpdateManager')
return await UpdateManager.promises.lockUpdatesAndDo(
@@ -667,6 +743,7 @@ module.exports = {
'ranges',
'pathname',
'projectHistoryId',
+ 'type',
],
getDocAndRecentOpsWithLock: [
'lines',
@@ -675,7 +752,9 @@ module.exports = {
'ranges',
'pathname',
'projectHistoryId',
+ 'type',
],
+ getCommentWithLock: ['comment'],
},
}),
promises: DocumentManager,
diff --git a/services/document-updater/app/js/Errors.js b/services/document-updater/app/js/Errors.js
index a43f69ad35..ac1f5875fa 100644
--- a/services/document-updater/app/js/Errors.js
+++ b/services/document-updater/app/js/Errors.js
@@ -5,6 +5,15 @@ class OpRangeNotAvailableError extends OError {}
class ProjectStateChangedError extends OError {}
class DeleteMismatchError extends OError {}
class FileTooLargeError extends OError {}
+class OTTypeMismatchError extends OError {
+ /**
+ * @param {OTType} got
+ * @param {OTType} want
+ */
+ constructor(got, want) {
+ super('ot type mismatch', { got, want })
+ }
+}
module.exports = {
NotFoundError,
@@ -12,4 +21,5 @@ module.exports = {
ProjectStateChangedError,
DeleteMismatchError,
FileTooLargeError,
+ OTTypeMismatchError,
}
diff --git a/services/document-updater/app/js/HistoryManager.js b/services/document-updater/app/js/HistoryManager.js
index 3a91b29cb8..d9a8459525 100644
--- a/services/document-updater/app/js/HistoryManager.js
+++ b/services/document-updater/app/js/HistoryManager.js
@@ -62,6 +62,7 @@ const HistoryManager = {
// record updates for project history
if (
HistoryManager.shouldFlushHistoryOps(
+ projectId,
projectOpsLength,
ops.length,
HistoryManager.FLUSH_PROJECT_EVERY_N_OPS
@@ -77,7 +78,8 @@ const HistoryManager = {
}
},
- shouldFlushHistoryOps(length, opsLength, threshold) {
+ shouldFlushHistoryOps(projectId, length, opsLength, threshold) {
+ if (Settings.shortHistoryQueues.includes(projectId)) return true
if (!length) {
return false
} // don't flush unless we know the length
@@ -106,10 +108,12 @@ const HistoryManager = {
projectHistoryId,
docs,
files,
+ opts,
function (error) {
if (error) {
return callback(error)
}
+ if (opts.resyncProjectStructureOnly) return callback()
const DocumentManager = require('./DocumentManager')
const resyncDoc = (doc, cb) => {
DocumentManager.resyncDocContentsWithLock(
diff --git a/services/document-updater/app/js/HistoryOTUpdateManager.js b/services/document-updater/app/js/HistoryOTUpdateManager.js
new file mode 100644
index 0000000000..5a8b92099e
--- /dev/null
+++ b/services/document-updater/app/js/HistoryOTUpdateManager.js
@@ -0,0 +1,158 @@
+// @ts-check
+
+const Profiler = require('./Profiler')
+const DocumentManager = require('./DocumentManager')
+const Errors = require('./Errors')
+const RedisManager = require('./RedisManager')
+const {
+ EditOperationBuilder,
+ StringFileData,
+ EditOperationTransformer,
+} = require('overleaf-editor-core')
+const Metrics = require('./Metrics')
+const ProjectHistoryRedisManager = require('./ProjectHistoryRedisManager')
+const HistoryManager = require('./HistoryManager')
+const RealTimeRedisManager = require('./RealTimeRedisManager')
+
+/**
+ * @typedef {import("./types").Update} Update
+ * @typedef {import("./types").HistoryOTEditOperationUpdate} HistoryOTEditOperationUpdate
+ */
+
+/**
+ * @param {Update} update
+ * @return {update is HistoryOTEditOperationUpdate}
+ */
+function isHistoryOTEditOperationUpdate(update) {
+ return (
+ update &&
+ 'doc' in update &&
+ 'op' in update &&
+ 'v' in update &&
+ Array.isArray(update.op) &&
+ EditOperationBuilder.isValid(update.op[0])
+ )
+}
+
+/**
+ * Try to apply an update to the given document
+ *
+ * @param {string} projectId
+ * @param {string} docId
+ * @param {HistoryOTEditOperationUpdate} update
+ * @param {Profiler} profiler
+ */
+async function tryApplyUpdate(projectId, docId, update, profiler) {
+ let { lines, version, pathname, type } =
+ await DocumentManager.promises.getDoc(projectId, docId)
+ profiler.log('getDoc')
+
+ if (lines == null || version == null) {
+ throw new Errors.NotFoundError(`document not found: ${docId}`)
+ }
+ if (type !== 'history-ot') {
+ throw new Errors.OTTypeMismatchError(type, 'history-ot')
+ }
+
+ let op = EditOperationBuilder.fromJSON(update.op[0])
+ if (version !== update.v) {
+ const transformUpdates = await RedisManager.promises.getPreviousDocOps(
+ docId,
+ update.v,
+ version
+ )
+ for (const transformUpdate of transformUpdates) {
+ if (!isHistoryOTEditOperationUpdate(transformUpdate)) {
+ throw new Errors.OTTypeMismatchError('sharejs-text-ot', 'history-ot')
+ }
+
+ if (
+ transformUpdate.meta.source &&
+ update.dupIfSource?.includes(transformUpdate.meta.source)
+ ) {
+ update.dup = true
+ break
+ }
+ const other = EditOperationBuilder.fromJSON(transformUpdate.op[0])
+ op = EditOperationTransformer.transform(op, other)[0]
+ }
+ update.op = [op.toJSON()]
+ }
+
+ if (!update.dup) {
+ const file = StringFileData.fromRaw(lines)
+ file.edit(op)
+ version += 1
+ update.meta.ts = Date.now()
+ await RedisManager.promises.updateDocument(
+ projectId,
+ docId,
+ file.toRaw(),
+ version,
+ [update],
+ {},
+ update.meta
+ )
+
+ Metrics.inc('history-queue', 1, { status: 'project-history' })
+ try {
+ const projectOpsLength =
+ await ProjectHistoryRedisManager.promises.queueOps(projectId, [
+ JSON.stringify({
+ ...update,
+ meta: {
+ ...update.meta,
+ pathname,
+ },
+ }),
+ ])
+ HistoryManager.recordAndFlushHistoryOps(
+ projectId,
+ [update],
+ projectOpsLength
+ )
+ profiler.log('recordAndFlushHistoryOps')
+ } catch (err) {
+ // The full project history can re-sync a project in case
+ // updates went missing.
+ // Just record the error here and acknowledge the write-op.
+ Metrics.inc('history-queue-error')
+ }
+ }
+ RealTimeRedisManager.sendData({
+ project_id: projectId,
+ doc_id: docId,
+ op: update,
+ })
+}
+
+/**
+ * Apply an update to the given document
+ *
+ * @param {string} projectId
+ * @param {string} docId
+ * @param {HistoryOTEditOperationUpdate} update
+ */
+async function applyUpdate(projectId, docId, update) {
+ const profiler = new Profiler('applyUpdate', {
+ project_id: projectId,
+ doc_id: docId,
+ type: 'history-ot',
+ })
+
+ try {
+ await tryApplyUpdate(projectId, docId, update, profiler)
+ } catch (error) {
+ RealTimeRedisManager.sendData({
+ project_id: projectId,
+ doc_id: docId,
+ error: error instanceof Error ? error.message : error,
+ })
+ profiler.log('sendData')
+ throw error
+ } finally {
+ profiler.end()
+ }
+}
+
+module.exports = { isHistoryOTEditOperationUpdate, applyUpdate }
diff --git a/services/document-updater/app/js/HttpController.js b/services/document-updater/app/js/HttpController.js
index 2d6e81eebb..0a6ae3b2b4 100644
--- a/services/document-updater/app/js/HttpController.js
+++ b/services/document-updater/app/js/HttpController.js
@@ -9,6 +9,7 @@ const Metrics = require('./Metrics')
const DeleteQueueManager = require('./DeleteQueueManager')
const { getTotalSizeOfLines } = require('./Limits')
const async = require('async')
+const { StringFileData } = require('overleaf-editor-core')
function getDoc(req, res, next) {
let fromVersion
@@ -27,7 +28,7 @@ function getDoc(req, res, next) {
projectId,
docId,
fromVersion,
- (error, lines, version, ops, ranges, pathname) => {
+ (error, lines, version, ops, ranges, pathname, _projectHistoryId, type) => {
timer.done()
if (error) {
return next(error)
@@ -36,6 +37,11 @@ function getDoc(req, res, next) {
if (lines == null || version == null) {
return next(new Errors.NotFoundError('document not found'))
}
+ if (!Array.isArray(lines) && req.query.historyOTSupport !== 'true') {
+ const file = StringFileData.fromRaw(lines)
+ // TODO(24596): tc support for history-ot
+ lines = file.getLines()
+ }
res.json({
id: docId,
lines,
@@ -44,11 +50,35 @@ function getDoc(req, res, next) {
ranges,
pathname,
ttlInS: RedisManager.DOC_OPS_TTL,
+ type,
})
}
)
}
+function getComment(req, res, next) {
+ const docId = req.params.doc_id
+ const projectId = req.params.project_id
+ const commentId = req.params.comment_id
+
+ logger.debug({ projectId, docId, commentId }, 'getting comment via http')
+
+ DocumentManager.getCommentWithLock(
+ projectId,
+ docId,
+ commentId,
+ (error, comment) => {
+ if (error) {
+ return next(error)
+ }
+ if (comment == null) {
+ return next(new Errors.NotFoundError('comment not found'))
+ }
+ res.json(comment)
+ }
+ )
+}
+
// return the doc from redis if present, but don't load it from mongo
function peekDoc(req, res, next) {
const docId = req.params.doc_id
@@ -61,6 +91,11 @@ function peekDoc(req, res, next) {
if (lines == null || version == null) {
return next(new Errors.NotFoundError('document not found'))
}
+ if (!Array.isArray(lines) && req.query.historyOTSupport !== 'true') {
+ const file = StringFileData.fromRaw(lines)
+ // TODO(24596): tc support for history-ot
+ lines = file.getLines()
+ }
res.json({ id: docId, lines, version })
})
}
@@ -106,6 +141,22 @@ function getProjectDocsAndFlushIfOld(req, res, next) {
)
}
+function getProjectLastUpdatedAt(req, res, next) {
+ const projectId = req.params.project_id
+ ProjectManager.getProjectDocsTimestamps(projectId, (err, timestamps) => {
+ if (err) return next(err)
+
+ // Filter out nulls. This can happen when
+ // - docs get flushed between the listing and getting the individual docs ts
+ // - a doc flush failed half way (doc keys removed, project tracking not updated)
+ timestamps = timestamps.filter(ts => !!ts)
+
+ timestamps = timestamps.map(ts => parseInt(ts, 10))
+ timestamps.sort((a, b) => (a > b ? 1 : -1))
+ res.json({ lastUpdatedAt: timestamps.pop() })
+ })
+}
+
function clearProjectState(req, res, next) {
const projectId = req.params.project_id
const timer = new Metrics.Timer('http.clearProjectState')
@@ -410,7 +461,13 @@ function updateProject(req, res, next) {
function resyncProjectHistory(req, res, next) {
const projectId = req.params.project_id
- const { projectHistoryId, docs, files, historyRangesMigration } = req.body
+ const {
+ projectHistoryId,
+ docs,
+ files,
+ historyRangesMigration,
+ resyncProjectStructureOnly,
+ } = req.body
logger.debug(
{ projectId, docs, files },
@@ -421,6 +478,9 @@ function resyncProjectHistory(req, res, next) {
if (historyRangesMigration) {
opts.historyRangesMigration = historyRangesMigration
}
+ if (resyncProjectStructureOnly) {
+ opts.resyncProjectStructureOnly = resyncProjectStructureOnly
+ }
HistoryManager.resyncProjectHistory(
projectId,
@@ -489,6 +549,7 @@ module.exports = {
getDoc,
peekDoc,
getProjectDocsAndFlushIfOld,
+ getProjectLastUpdatedAt,
clearProjectState,
appendToDoc,
setDoc,
@@ -506,4 +567,5 @@ module.exports = {
flushQueuedProjects,
blockProject,
unblockProject,
+ getComment,
}
diff --git a/services/document-updater/app/js/Limits.js b/services/document-updater/app/js/Limits.js
index 268ccd3f9b..cbd9293042 100644
--- a/services/document-updater/app/js/Limits.js
+++ b/services/document-updater/app/js/Limits.js
@@ -28,4 +28,19 @@ module.exports = {
// since we didn't hit the limit in the loop, the document is within the allowed length
return false
},
+
+ /**
+ * @param {StringFileRawData} raw
+ * @param {number} maxDocLength
+ */
+ stringFileDataContentIsTooLarge(raw, maxDocLength) {
+ let n = raw.content.length
+ if (n <= maxDocLength) return false // definitely under the limit, no need to calculate the total size
+ for (const tc of raw.trackedChanges ?? []) {
+ if (tc.tracking.type !== 'delete') continue
+ n -= tc.range.length
+ if (n <= maxDocLength) return false // under the limit now, no need to calculate the exact size
+ }
+ return true
+ },
}
diff --git a/services/document-updater/app/js/PersistenceManager.js b/services/document-updater/app/js/PersistenceManager.js
index b08994ae41..6e832f9aa7 100644
--- a/services/document-updater/app/js/PersistenceManager.js
+++ b/services/document-updater/app/js/PersistenceManager.js
@@ -95,6 +95,13 @@ function getDoc(projectId, docId, options = {}, _callback) {
status: body.pathname === '' ? 'zero-length' : 'undefined',
})
}
+
+ if (body.otMigrationStage > 0) {
+ // Use history-ot
+ body.lines = { content: body.lines.join('\n') }
+ body.ranges = {}
+ }
+
callback(
null,
body.lines,
diff --git a/services/document-updater/app/js/Profiler.js b/services/document-updater/app/js/Profiler.js
index 8daac4ca41..aac8a9706e 100644
--- a/services/document-updater/app/js/Profiler.js
+++ b/services/document-updater/app/js/Profiler.js
@@ -1,68 +1,52 @@
-/* eslint-disable
- no-unused-vars,
-*/
-// TODO: This file was created by bulk-decaffeinate.
-// Fix any style issues and re-enable lint.
-/*
- * decaffeinate suggestions:
- * DS206: Consider reworking classes to avoid initClass
- * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
- */
-let Profiler
-const Settings = require('@overleaf/settings')
const logger = require('@overleaf/logger')
-const deltaMs = function (ta, tb) {
+function deltaMs(ta, tb) {
const nanoSeconds = (ta[0] - tb[0]) * 1e9 + (ta[1] - tb[1])
const milliSeconds = Math.floor(nanoSeconds * 1e-6)
return milliSeconds
}
-module.exports = Profiler = (function () {
- Profiler = class Profiler {
- static initClass() {
- this.prototype.LOG_CUTOFF_TIME = 15 * 1000
- this.prototype.LOG_SYNC_CUTOFF_TIME = 1000
- }
+class Profiler {
+ LOG_CUTOFF_TIME = 15 * 1000
+ LOG_SYNC_CUTOFF_TIME = 1000
- constructor(name, args) {
- this.name = name
- this.args = args
- this.t0 = this.t = process.hrtime()
- this.start = new Date()
- this.updateTimes = []
- this.totalSyncTime = 0
- }
-
- log(label, options = {}) {
- const t1 = process.hrtime()
- const dtMilliSec = deltaMs(t1, this.t)
- this.t = t1
- this.totalSyncTime += options.sync ? dtMilliSec : 0
- this.updateTimes.push([label, dtMilliSec]) // timings in ms
- return this // make it chainable
- }
-
- end(message) {
- const totalTime = deltaMs(this.t, this.t0)
- const exceedsCutoff = totalTime > this.LOG_CUTOFF_TIME
- const exceedsSyncCutoff = this.totalSyncTime > this.LOG_SYNC_CUTOFF_TIME
- if (exceedsCutoff || exceedsSyncCutoff) {
- // log anything greater than cutoffs
- const args = {}
- for (const k in this.args) {
- const v = this.args[k]
- args[k] = v
- }
- args.updateTimes = this.updateTimes
- args.start = this.start
- args.end = new Date()
- args.status = { exceedsCutoff, exceedsSyncCutoff }
- logger.warn(args, this.name)
- }
- return totalTime
- }
+ constructor(name, args) {
+ this.name = name
+ this.args = args
+ this.t0 = this.t = process.hrtime()
+ this.start = new Date()
+ this.updateTimes = []
+ this.totalSyncTime = 0
}
- Profiler.initClass()
- return Profiler
-})()
+
+ log(label, options = {}) {
+ const t1 = process.hrtime()
+ const dtMilliSec = deltaMs(t1, this.t)
+ this.t = t1
+ this.totalSyncTime += options.sync ? dtMilliSec : 0
+ this.updateTimes.push([label, dtMilliSec]) // timings in ms
+ return this // make it chainable
+ }
+
+ end() {
+ const totalTime = deltaMs(this.t, this.t0)
+ const exceedsCutoff = totalTime > this.LOG_CUTOFF_TIME
+ const exceedsSyncCutoff = this.totalSyncTime > this.LOG_SYNC_CUTOFF_TIME
+ if (exceedsCutoff || exceedsSyncCutoff) {
+ // log anything greater than cutoffs
+ const args = {}
+ for (const k in this.args) {
+ const v = this.args[k]
+ args[k] = v
+ }
+ args.updateTimes = this.updateTimes
+ args.start = this.start
+ args.end = new Date()
+ args.status = { exceedsCutoff, exceedsSyncCutoff }
+ logger.warn(args, this.name)
+ }
+ return totalTime
+ }
+}
+
+module.exports = Profiler
diff --git a/services/document-updater/app/js/ProjectHistoryRedisManager.js b/services/document-updater/app/js/ProjectHistoryRedisManager.js
index 888948af09..78e9c2ea4c 100644
--- a/services/document-updater/app/js/ProjectHistoryRedisManager.js
+++ b/services/document-updater/app/js/ProjectHistoryRedisManager.js
@@ -8,13 +8,14 @@ const rclient = require('@overleaf/redis-wrapper').createClient(
)
const logger = require('@overleaf/logger')
const metrics = require('./Metrics')
-const { docIsTooLarge } = require('./Limits')
+const { docIsTooLarge, stringFileDataContentIsTooLarge } = require('./Limits')
const { addTrackedDeletesToContent, extractOriginOrSource } = require('./Utils')
const HistoryConversions = require('./HistoryConversions')
const OError = require('@overleaf/o-error')
/**
* @import { Ranges } from './types'
+ * @import { StringFileRawData } from 'overleaf-editor-core/lib/types'
*/
const ProjectHistoryRedisManager = {
@@ -152,7 +153,13 @@ const ProjectHistoryRedisManager = {
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
},
- async queueResyncProjectStructure(projectId, projectHistoryId, docs, files) {
+ async queueResyncProjectStructure(
+ projectId,
+ projectHistoryId,
+ docs,
+ files,
+ opts
+ ) {
logger.debug({ projectId, docs, files }, 'queue project structure resync')
const projectUpdate = {
resyncProjectStructure: { docs, files },
@@ -161,6 +168,9 @@ const ProjectHistoryRedisManager = {
ts: new Date(),
},
}
+ if (opts.resyncProjectStructureOnly) {
+ projectUpdate.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
+ }
const jsonUpdate = JSON.stringify(projectUpdate)
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
},
@@ -171,7 +181,7 @@ const ProjectHistoryRedisManager = {
* @param {string} projectId
* @param {string} projectHistoryId
* @param {string} docId
- * @param {string[]} lines
+ * @param {string[] | StringFileRawData} lines
* @param {Ranges} ranges
* @param {string[]} resolvedCommentIds
* @param {number} version
@@ -195,13 +205,8 @@ const ProjectHistoryRedisManager = {
'queue doc content resync'
)
- let content = lines.join('\n')
- if (historyRangesSupport) {
- content = addTrackedDeletesToContent(content, ranges.changes ?? [])
- }
-
const projectUpdate = {
- resyncDocContent: { content, version },
+ resyncDocContent: { version },
projectHistoryId,
path: pathname,
doc: docId,
@@ -210,17 +215,38 @@ const ProjectHistoryRedisManager = {
},
}
- if (historyRangesSupport) {
- projectUpdate.resyncDocContent.ranges =
- HistoryConversions.toHistoryRanges(ranges)
- projectUpdate.resyncDocContent.resolvedCommentIds = resolvedCommentIds
+ let content = ''
+ if (Array.isArray(lines)) {
+ content = lines.join('\n')
+ if (historyRangesSupport) {
+ content = addTrackedDeletesToContent(content, ranges.changes ?? [])
+ projectUpdate.resyncDocContent.ranges =
+ HistoryConversions.toHistoryRanges(ranges)
+ projectUpdate.resyncDocContent.resolvedCommentIds = resolvedCommentIds
+ }
+ } else {
+ content = lines.content
+ projectUpdate.resyncDocContent.historyOTRanges = {
+ comments: lines.comments,
+ trackedChanges: lines.trackedChanges,
+ }
}
+ projectUpdate.resyncDocContent.content = content
const jsonUpdate = JSON.stringify(projectUpdate)
// Do an optimised size check on the docLines using the serialised
// project update length as an upper bound
const sizeBound = jsonUpdate.length
- if (docIsTooLarge(sizeBound, lines, Settings.max_doc_length)) {
+ if (Array.isArray(lines)) {
+ if (docIsTooLarge(sizeBound, lines, Settings.max_doc_length)) {
+ throw new OError(
+ 'blocking resync doc content insert into project history queue: doc is too large',
+ { projectId, docId, docSize: sizeBound }
+ )
+ }
+ } else if (
+ stringFileDataContentIsTooLarge(lines, Settings.max_doc_length)
+ ) {
throw new OError(
'blocking resync doc content insert into project history queue: doc is too large',
{ projectId, docId, docSize: sizeBound }
diff --git a/services/document-updater/app/js/ProjectManager.js b/services/document-updater/app/js/ProjectManager.js
index 781ed0e168..cdd4c11482 100644
--- a/services/document-updater/app/js/ProjectManager.js
+++ b/services/document-updater/app/js/ProjectManager.js
@@ -317,6 +317,7 @@ function updateProjectWithLocks(
}
if (
HistoryManager.shouldFlushHistoryOps(
+ projectId,
projectOpsLength,
updates.length,
HistoryManager.FLUSH_PROJECT_EVERY_N_OPS
diff --git a/services/document-updater/app/js/RangesManager.js b/services/document-updater/app/js/RangesManager.js
index 9b8a50c526..c146afda60 100644
--- a/services/document-updater/app/js/RangesManager.js
+++ b/services/document-updater/app/js/RangesManager.js
@@ -80,13 +80,6 @@ const RangesManager = {
}
}
- sanityCheckTrackedChanges(
- projectId,
- docId,
- rangesTracker.changes,
- getDocLength(newDocLines)
- )
-
if (
rangesTracker.changes?.length > RangesManager.MAX_CHANGES ||
rangesTracker.comments?.length > RangesManager.MAX_COMMENTS
@@ -139,12 +132,6 @@ const RangesManager = {
logger.debug(`accepting ${changeIds.length} changes in ranges`)
const rangesTracker = new RangesTracker(changes, comments)
rangesTracker.removeChangeIds(changeIds)
- sanityCheckTrackedChanges(
- projectId,
- docId,
- rangesTracker.changes,
- getDocLength(lines)
- )
const newRanges = RangesManager._getRanges(rangesTracker)
return newRanges
},
@@ -352,6 +339,12 @@ function getHistoryOpForInsert(op, comments, changes) {
}
}
+ // If it's determined that the op is a tracked delete rejection, we have to
+ // calculate its proper history position. If multiple tracked deletes are
+ // found at the same position as the insert, the tracked deletes that come
+ // before the tracked delete that was actually rejected offset the history
+ // position.
+ let trackedDeleteRejectionOffset = 0
for (const change of changes) {
if (!isDelete(change.op)) {
// We're only interested in tracked deletes
@@ -362,14 +355,25 @@ function getHistoryOpForInsert(op, comments, changes) {
// Tracked delete is before the op. Move the op forward.
hpos += change.op.d.length
} else if (change.op.p === op.p) {
- // Tracked delete is at the same position as the op. The insert comes before
- // the tracked delete so it doesn't move.
+ // Tracked delete is at the same position as the op.
if (op.u && change.op.d.startsWith(op.i)) {
// We're undoing and the insert matches the start of the tracked
// delete. RangesManager treats this as a tracked delete rejection. We
// will note this in the op so that project-history can take the
// appropriate action.
trackedDeleteRejection = true
+
+ // The history must be updated to take into account all preceding
+ // tracked deletes at the same position
+ hpos += trackedDeleteRejectionOffset
+
+ // No need to continue. All subsequent tracked deletes are after the
+ // insert.
+ break
+ } else {
+ // This tracked delete does not match the insert. Note its length in
+ // case we find a tracked delete that matches later.
+ trackedDeleteRejectionOffset += change.op.d.length
}
} else {
// Tracked delete is after the insert. Tracked deletes are ordered, so
@@ -570,88 +574,4 @@ function getCroppedCommentOps(op, comments) {
return historyCommentOps
}
-/**
- * Check some tracked changes assumptions:
- *
- * - Tracked changes can't be empty
- * - Tracked inserts can't overlap with another tracked change
- * - There can't be two tracked deletes at the same position
- * - Ranges should be ordered by position, deletes before inserts
- *
- * If any assumption isn't upheld, log a warning.
- *
- * @param {string} projectId
- * @param {string} docId
- * @param {TrackedChange[]} changes
- * @param {number} docLength
- */
-function sanityCheckTrackedChanges(projectId, docId, changes, docLength) {
- let lastDeletePos = -1 // allow a tracked delete at position 0
- let lastInsertEnd = 0
- let ok = true
- let badChangeIndex
- for (let i = 0; i < changes.length; i++) {
- const change = changes[i]
-
- const op = change.op
- if ('i' in op) {
- if (
- op.i.length === 0 ||
- op.p < lastDeletePos ||
- op.p < lastInsertEnd ||
- op.p < 0 ||
- op.p + op.i.length > docLength
- ) {
- ok = false
- badChangeIndex = i
- break
- }
- lastInsertEnd = op.p + op.i.length
- } else if ('d' in op) {
- if (
- op.d.length === 0 ||
- op.p <= lastDeletePos ||
- op.p < lastInsertEnd ||
- op.p < 0 ||
- op.p > docLength
- ) {
- ok = false
- badChangeIndex = i
- break
- }
- lastDeletePos = op.p
- if (lastDeletePos >= docLength) {
- badChangeIndex = i
- break
- }
- }
- }
-
- if (ok) {
- return
- }
-
- const changeRanges = []
- for (const change of changes) {
- if ('i' in change.op) {
- changeRanges.push({
- id: change.id,
- p: change.op.p,
- i: change.op.i.length,
- })
- } else if ('d' in change.op) {
- changeRanges.push({
- id: change.id,
- p: change.op.p,
- d: change.op.d.length,
- })
- }
- }
-
- logger.warn(
- { projectId, docId, changes: changeRanges, badChangeIndex },
- 'Malformed tracked changes detected'
- )
-}
-
module.exports = RangesManager
diff --git a/services/document-updater/app/js/RealTimeRedisManager.js b/services/document-updater/app/js/RealTimeRedisManager.js
index 08bf132dec..2b67971c5c 100644
--- a/services/document-updater/app/js/RealTimeRedisManager.js
+++ b/services/document-updater/app/js/RealTimeRedisManager.js
@@ -49,7 +49,7 @@ const RealTimeRedisManager = {
MAX_OPS_PER_ITERATION,
-1
)
- return multi.exec(function (error, replys) {
+ multi.exec(function (error, replys) {
if (error != null) {
return callback(error)
}
@@ -80,7 +80,7 @@ const RealTimeRedisManager = {
},
getUpdatesLength(docId, callback) {
- return rclient.llen(Keys.pendingUpdates({ doc_id: docId }), callback)
+ rclient.llen(Keys.pendingUpdates({ doc_id: docId }), callback)
},
sendCanaryAppliedOp({ projectId, docId, op }) {
@@ -132,5 +132,5 @@ const RealTimeRedisManager = {
module.exports = RealTimeRedisManager
module.exports.promises = promisifyAll(RealTimeRedisManager, {
- without: ['sendData'],
+ without: ['sendCanaryAppliedOp', 'sendData'],
})
diff --git a/services/document-updater/app/js/RedisManager.js b/services/document-updater/app/js/RedisManager.js
index f8e97f38b4..7f86036427 100644
--- a/services/document-updater/app/js/RedisManager.js
+++ b/services/document-updater/app/js/RedisManager.js
@@ -48,6 +48,7 @@ const RedisManager = {
timer.done()
_callback(error)
}
+ const shareJSTextOT = Array.isArray(docLines)
const docLinesArray = docLines
docLines = JSON.stringify(docLines)
if (docLines.indexOf('\u0000') !== -1) {
@@ -60,7 +61,10 @@ const RedisManager = {
// Do an optimised size check on the docLines using the serialised
// length as an upper bound
const sizeBound = docLines.length
- if (docIsTooLarge(sizeBound, docLinesArray, Settings.max_doc_length)) {
+ if (
+ shareJSTextOT && // editor-core has a size check in TextOperation.apply and TextOperation.applyToLength.
+ docIsTooLarge(sizeBound, docLinesArray, Settings.max_doc_length)
+ ) {
const docSize = docLines.length
const err = new Error('blocking doc insert into redis: doc is too large')
logger.error({ projectId, docId, err, docSize }, err.message)
@@ -461,6 +465,7 @@ const RedisManager = {
if (appliedOps == null) {
appliedOps = []
}
+ const shareJSTextOT = Array.isArray(docLines)
RedisManager.getDocVersion(docId, (error, currentVersion) => {
if (error) {
return callback(error)
@@ -500,7 +505,10 @@ const RedisManager = {
// Do an optimised size check on the docLines using the serialised
// length as an upper bound
const sizeBound = newDocLines.length
- if (docIsTooLarge(sizeBound, docLines, Settings.max_doc_length)) {
+ if (
+ shareJSTextOT && // editor-core has a size check in TextOperation.apply and TextOperation.applyToLength.
+ docIsTooLarge(sizeBound, docLines, Settings.max_doc_length)
+ ) {
const err = new Error('blocking doc update: doc is too large')
const docSize = newDocLines.length
logger.error({ projectId, docId, err, docSize }, err.message)
diff --git a/services/document-updater/app/js/UpdateManager.js b/services/document-updater/app/js/UpdateManager.js
index b23522a2cb..e5df48575e 100644
--- a/services/document-updater/app/js/UpdateManager.js
+++ b/services/document-updater/app/js/UpdateManager.js
@@ -14,10 +14,11 @@ const DocumentManager = require('./DocumentManager')
const RangesManager = require('./RangesManager')
const SnapshotManager = require('./SnapshotManager')
const Profiler = require('./Profiler')
-const { isInsert, isDelete, getDocLength } = require('./Utils')
+const { isInsert, isDelete, getDocLength, computeDocHash } = require('./Utils')
+const HistoryOTUpdateManager = require('./HistoryOTUpdateManager')
/**
- * @import { DeleteOp, InsertOp, Op, Ranges, Update, HistoryUpdate } from "./types"
+ * @import { Ranges, Update, HistoryUpdate } from "./types"
*/
const UpdateManager = {
@@ -80,7 +81,11 @@ const UpdateManager = {
profile.log('getPendingUpdatesForDoc')
for (const update of updates) {
- await UpdateManager.applyUpdate(projectId, docId, update)
+ if (HistoryOTUpdateManager.isHistoryOTEditOperationUpdate(update)) {
+ await HistoryOTUpdateManager.applyUpdate(projectId, docId, update)
+ } else {
+ await UpdateManager.applyUpdate(projectId, docId, update)
+ }
profile.log('applyUpdate')
}
profile.log('async done').end()
@@ -110,12 +115,16 @@ const UpdateManager = {
pathname,
projectHistoryId,
historyRangesSupport,
+ type,
} = await DocumentManager.promises.getDoc(projectId, docId)
profile.log('getDoc')
if (lines == null || version == null) {
throw new Errors.NotFoundError(`document not found: ${docId}`)
}
+ if (type !== 'sharejs-text-ot') {
+ throw new Errors.OTTypeMismatchError(type, 'sharejs-text-ot')
+ }
const previousVersion = version
const incomingUpdateVersion = update.v
@@ -162,6 +171,7 @@ const UpdateManager = {
projectHistoryId,
lines,
ranges,
+ updatedDocLines,
historyRangesSupport
)
@@ -290,8 +300,9 @@ const UpdateManager = {
* @param {HistoryUpdate[]} updates
* @param {string} pathname
* @param {string} projectHistoryId
- * @param {string[]} lines
- * @param {Ranges} ranges
+ * @param {string[]} lines - document lines before updates were applied
+ * @param {Ranges} ranges - ranges before updates were applied
+ * @param {string[]} newLines - document lines after updates were applied
* @param {boolean} historyRangesSupport
*/
_adjustHistoryUpdatesMetadata(
@@ -300,6 +311,7 @@ const UpdateManager = {
projectHistoryId,
lines,
ranges,
+ newLines,
historyRangesSupport
) {
let docLength = getDocLength(lines)
@@ -363,6 +375,12 @@ const UpdateManager = {
delete update.meta.tc
}
}
+
+ if (historyRangesSupport && updates.length > 0) {
+ const lastUpdate = updates[updates.length - 1]
+ lastUpdate.meta ??= {}
+ lastUpdate.meta.doc_hash = computeDocHash(newLines)
+ }
},
}
diff --git a/services/document-updater/app/js/Utils.js b/services/document-updater/app/js/Utils.js
index 4e9e60ba06..a632cf32eb 100644
--- a/services/document-updater/app/js/Utils.js
+++ b/services/document-updater/app/js/Utils.js
@@ -1,4 +1,5 @@
// @ts-check
+const { createHash } = require('node:crypto')
const _ = require('lodash')
/**
@@ -79,6 +80,27 @@ function addTrackedDeletesToContent(content, trackedChanges) {
return result
}
+/**
+ * Compute the content hash for a doc
+ *
+ * This hash is sent to the history to validate updates.
+ *
+ * @param {string[]} lines
+ * @return {string} the doc hash
+ */
+function computeDocHash(lines) {
+ const hash = createHash('sha1')
+ if (lines.length > 0) {
+ for (const line of lines.slice(0, lines.length - 1)) {
+ hash.update(line)
+ hash.update('\n')
+ }
+ // The last line doesn't end with a newline
+ hash.update(lines[lines.length - 1])
+ }
+ return hash.digest('hex')
+}
+
/**
* checks if the given originOrSource should be treated as a source or origin
* TODO: remove this hack and remove all "source" references
@@ -102,5 +124,6 @@ module.exports = {
isComment,
addTrackedDeletesToContent,
getDocLength,
+ computeDocHash,
extractOriginOrSource,
}
diff --git a/services/document-updater/app/js/types.ts b/services/document-updater/app/js/types.ts
index d635ab31ca..851e62d8c8 100644
--- a/services/document-updater/app/js/types.ts
+++ b/services/document-updater/app/js/types.ts
@@ -1,12 +1,17 @@
import {
TrackingPropsRawData,
ClearTrackingPropsRawData,
+ RawEditOperation,
} from 'overleaf-editor-core/lib/types'
+export type OTType = 'sharejs-text-ot' | 'history-ot'
+
/**
* An update coming from the editor
*/
export type Update = {
+ dup?: boolean
+ dupIfSource?: string[]
doc: string
op: Op[]
v: number
@@ -18,6 +23,11 @@ export type Update = {
projectHistoryId?: string
}
+export type HistoryOTEditOperationUpdate = Omit & {
+ op: RawEditOperation[]
+ meta: Update['meta'] & { source: string }
+}
+
export type Op = InsertOp | DeleteOp | CommentOp | RetainOp
export type InsertOp = {
@@ -84,6 +94,7 @@ export type HistoryUpdate = {
pathname?: string
doc_length?: number
history_doc_length?: number
+ doc_hash?: string
tc?: boolean
user_id?: string
}
diff --git a/services/document-updater/buildscript.txt b/services/document-updater/buildscript.txt
index dcd39704c8..98d10a8e55 100644
--- a/services/document-updater/buildscript.txt
+++ b/services/document-updater/buildscript.txt
@@ -4,6 +4,6 @@ document-updater
--env-add=
--env-pass-through=
--esmock-loader=False
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=True
---script-version=4.5.0
+--script-version=4.7.0
diff --git a/services/document-updater/config/settings.defaults.js b/services/document-updater/config/settings.defaults.js
index 0cd29d325b..9ed59de6c4 100755
--- a/services/document-updater/config/settings.defaults.js
+++ b/services/document-updater/config/settings.defaults.js
@@ -184,4 +184,8 @@ module.exports = {
smoothingOffset: process.env.SMOOTHING_OFFSET || 1000, // milliseconds
gracefulShutdownDelayInMs:
parseInt(process.env.GRACEFUL_SHUTDOWN_DELAY_SECONDS ?? '10', 10) * 1000,
+
+ shortHistoryQueues: (process.env.SHORT_HISTORY_QUEUES || '')
+ .split(',')
+ .filter(s => !!s),
}
diff --git a/services/document-updater/docker-compose.ci.yml b/services/document-updater/docker-compose.ci.yml
index 332a9710ca..c6ec24a84b 100644
--- a/services/document-updater/docker-compose.ci.yml
+++ b/services/document-updater/docker-compose.ci.yml
@@ -21,18 +21,22 @@ services:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
+ HISTORY_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
+ volumes:
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
redis:
condition: service_healthy
user: node
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run test:acceptance
@@ -44,16 +48,21 @@ services:
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
redis:
- image: redis
+ image: redis:7.4.3
healthcheck:
test: ping="$$(redis-cli ping)" && [ "$$ping" = 'PONG' ]
interval: 1s
retries: 20
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/document-updater/docker-compose.yml b/services/document-updater/docker-compose.yml
index c871efe585..c1b23c11c5 100644
--- a/services/document-updater/docker-compose.yml
+++ b/services/document-updater/docker-compose.yml
@@ -6,7 +6,7 @@ version: "2.3"
services:
test_unit:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/document-updater
- ../../node_modules:/overleaf/node_modules
@@ -14,49 +14,58 @@ services:
working_dir: /overleaf/services/document-updater
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/document-updater
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
working_dir: /overleaf/services/document-updater
environment:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
+ HISTORY_REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
redis:
condition: service_healthy
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run --silent test:acceptance
redis:
- image: redis
+ image: redis:7.4.3
healthcheck:
test: ping=$$(redis-cli ping) && [ "$$ping" = 'PONG' ]
interval: 1s
retries: 20
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/document-updater/package.json b/services/document-updater/package.json
index 5759bdae22..7d892689e9 100644
--- a/services/document-updater/package.json
+++ b/services/document-updater/package.json
@@ -30,10 +30,11 @@
"body-parser": "^1.20.3",
"bunyan": "^1.8.15",
"diff-match-patch": "overleaf/diff-match-patch#89805f9c671a77a263fc53461acd62aa7498f688",
- "express": "^4.21.0",
+ "express": "^4.21.2",
"lodash": "^4.17.21",
"minimist": "^1.2.8",
"mongodb-legacy": "6.1.3",
+ "overleaf-editor-core": "*",
"request": "^2.88.2",
"requestretry": "^7.1.0"
},
@@ -41,7 +42,7 @@
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"cluster-key-slot": "^1.0.5",
- "mocha": "^10.2.0",
+ "mocha": "^11.1.0",
"sandboxed-module": "^2.0.4",
"sinon": "^9.2.4",
"sinon-chai": "^3.7.0",
diff --git a/services/document-updater/scripts/check_redis_mongo_sync_state.js b/services/document-updater/scripts/check_redis_mongo_sync_state.js
index 08209400aa..51db47af4d 100644
--- a/services/document-updater/scripts/check_redis_mongo_sync_state.js
+++ b/services/document-updater/scripts/check_redis_mongo_sync_state.js
@@ -15,6 +15,7 @@ const request = require('requestretry').defaults({
retryDelay: 10,
})
+const ONLY_PROJECT_ID = process.env.ONLY_PROJECT_ID
const AUTO_FIX_VERSION_MISMATCH =
process.env.AUTO_FIX_VERSION_MISMATCH === 'true'
const AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA =
@@ -319,10 +320,12 @@ async function processProject(projectId) {
* @return {Promise<{perIterationOutOfSync: number, done: boolean}>}
*/
async function scanOnce(processed, outOfSync) {
- const projectIds = await ProjectFlusher.promises.flushAllProjects({
- limit: LIMIT,
- dryRun: true,
- })
+ const projectIds = ONLY_PROJECT_ID
+ ? [ONLY_PROJECT_ID]
+ : await ProjectFlusher.promises.flushAllProjects({
+ limit: LIMIT,
+ dryRun: true,
+ })
let perIterationOutOfSync = 0
for (const projectId of projectIds) {
diff --git a/services/document-updater/scripts/flush_projects_with_no_history_id.js b/services/document-updater/scripts/flush_projects_with_no_history_id.js
new file mode 100644
index 0000000000..aa912b4b66
--- /dev/null
+++ b/services/document-updater/scripts/flush_projects_with_no_history_id.js
@@ -0,0 +1,211 @@
+// @ts-check
+
+const Settings = require('@overleaf/settings')
+const logger = require('@overleaf/logger')
+const RedisManager = require('../app/js/RedisManager')
+const minimist = require('minimist')
+const { db, ObjectId } = require('../app/js/mongodb')
+const ProjectManager = require('../app/js/ProjectManager')
+const OError = require('@overleaf/o-error')
+
+const docUpdaterKeys = Settings.redis.documentupdater.key_schema
+
+const rclient = RedisManager.rclient
+
+const { verbose, commit, ...args } = minimist(process.argv.slice(2), {
+ boolean: ['verbose', 'commit'],
+ string: ['batchSize'],
+ default: {
+ batchSize: '1000',
+ },
+})
+
+logger.logger.level(verbose ? 'debug' : 'warn')
+
+const batchSize = parseInt(args.batchSize, 10)
+
+/**
+ * @typedef {import('ioredis').Redis} Redis
+ */
+
+/**
+ *
+ * @param {string} key
+ * @return {string|void}
+ */
+function extractDocId(key) {
+ const matches = key.match(/ProjectHistoryId:\{(.*?)\}/)
+ if (matches) {
+ return matches[1]
+ }
+}
+
+/**
+ *
+ * @param {string} docId
+ * @return {Promise<{projectId: string, historyId: string}>}
+ */
+async function getHistoryId(docId) {
+ const doc = await db.docs.findOne(
+ { _id: new ObjectId(docId) },
+ { projection: { project_id: 1 }, readPreference: 'secondaryPreferred' }
+ )
+
+ if (!doc) {
+ throw new OError('Doc not present in mongo', { docId })
+ }
+
+ const project = await db.projects.findOne(
+ { _id: doc.project_id },
+ {
+ projection: { 'overleaf.history': 1 },
+ readPreference: 'secondaryPreferred',
+ }
+ )
+
+ if (!project?.overleaf?.history?.id) {
+ throw new OError('Project not present in mongo (or has no history id)', {
+ docId,
+ project,
+ doc,
+ })
+ }
+
+ return {
+ historyId: project?.overleaf?.history?.id,
+ projectId: doc.project_id.toString(),
+ }
+}
+
+/**
+ * @typedef {Object} UpdateableDoc
+ * @property {string} docId
+ * @property {string} projectId
+ * @property {string} historyId
+ */
+
+/**
+ *
+ * @param {Redis} node
+ * @param {Array} docIds
+ * @return {Promise>}
+ */
+async function findDocsWithMissingHistoryIds(node, docIds) {
+ const historyIds = await node.mget(
+ docIds.map(docId => docUpdaterKeys.projectHistoryId({ doc_id: docId }))
+ )
+
+ const results = []
+
+ for (const index in docIds) {
+ const historyId = historyIds[index]
+ const docId = docIds[index]
+ if (!historyId) {
+ try {
+ const { projectId, historyId } = await getHistoryId(docId)
+ results.push({ projectId, historyId, docId })
+ } catch (error) {
+ logger.warn(
+ { error },
+ 'Error gathering data for doc with missing history id'
+ )
+ }
+ }
+ }
+ return results
+}
+
+/**
+ *
+ * @param {Array} updates
+ * @return {Promise}
+ */
+async function fixAndFlushProjects(updates) {
+ for (const update of updates) {
+ if (commit) {
+ try {
+ await rclient.set(
+ docUpdaterKeys.projectHistoryId({ doc_id: update.docId }),
+ update.historyId
+ )
+ logger.debug({ ...update }, 'Set history id in redis')
+ await ProjectManager.promises.flushAndDeleteProjectWithLocks(
+ update.projectId,
+ {}
+ )
+ logger.debug({ ...update }, 'Flushed project')
+ } catch (err) {
+ logger.error({ err, ...update }, 'Error fixing and flushing project')
+ }
+ } else {
+ logger.debug(
+ { ...update },
+ 'Would have set history id in redis and flushed'
+ )
+ }
+ }
+}
+
+/**
+ *
+ * @param {Array} nodes
+ * @param {number} batchSize
+ * @return {Promise}
+ */
+async function scanNodes(nodes, batchSize = 1000) {
+ let scanned = 0
+
+ for (const node of nodes) {
+ const stream = node.scanStream({
+ match: docUpdaterKeys.projectHistoryId({ doc_id: '*' }),
+ count: batchSize,
+ })
+
+ for await (const docKeys of stream) {
+ if (docKeys.length === 0) {
+ continue
+ }
+ stream.pause()
+ scanned += docKeys.length
+
+ const docIds = docKeys
+ .map((/** @type {string} */ docKey) => extractDocId(docKey))
+ .filter(Boolean)
+
+ try {
+ const updates = await findDocsWithMissingHistoryIds(node, docIds)
+ if (updates.length > 0) {
+ logger.info({ updates }, 'Found doc(s) with missing history ids')
+ await fixAndFlushProjects(updates)
+ }
+ } catch (error) {
+ logger.error({ docKeys }, 'Error processing batch')
+ } finally {
+ stream.resume()
+ }
+ }
+
+ logger.info({ scanned, server: node.serverInfo.role }, 'Scanned node')
+ }
+}
+
+async function main({ batchSize }) {
+ const nodes = (typeof rclient.nodes === 'function'
+ ? rclient.nodes('master')
+ : undefined) || [rclient]
+ await scanNodes(nodes, batchSize)
+}
+
+let code = 0
+
+main({ batchSize })
+ .then(() => {
+ logger.info({}, 'done')
+ })
+ .catch(error => {
+ logger.error({ error }, 'error')
+ code = 1
+ })
+ .finally(() => {
+ rclient.quit().then(() => process.exit(code))
+ })
diff --git a/services/document-updater/test/acceptance/js/ApplyingUpdatesToADocTests.js b/services/document-updater/test/acceptance/js/ApplyingUpdatesToADocTests.js
index 8de7f091a8..39ec6c2ac7 100644
--- a/services/document-updater/test/acceptance/js/ApplyingUpdatesToADocTests.js
+++ b/services/document-updater/test/acceptance/js/ApplyingUpdatesToADocTests.js
@@ -16,27 +16,36 @@ const DocUpdaterClient = require('./helpers/DocUpdaterClient')
const DocUpdaterApp = require('./helpers/DocUpdaterApp')
describe('Applying updates to a doc', function () {
- before(function (done) {
+ beforeEach(function (done) {
+ sinon.spy(MockWebApi, 'getDocument')
this.lines = ['one', 'two', 'three']
this.version = 42
this.op = {
i: 'one and a half\n',
p: 4,
}
+ this.project_id = DocUpdaterClient.randomId()
+ this.doc_id = DocUpdaterClient.randomId()
this.update = {
doc: this.doc_id,
op: [this.op],
v: this.version,
}
+ this.historyOTUpdate = {
+ doc: this.doc_id,
+ op: [{ textOperation: [4, 'one and a half\n', 9] }],
+ v: this.version,
+ meta: { source: 'random-publicId' },
+ }
this.result = ['one', 'one and a half', 'two', 'three']
DocUpdaterApp.ensureRunning(done)
})
+ afterEach(function () {
+ sinon.restore()
+ })
describe('when the document is not loaded', function () {
- before(function (done) {
- this.project_id = DocUpdaterClient.randomId()
- this.doc_id = DocUpdaterClient.randomId()
- sinon.spy(MockWebApi, 'getDocument')
+ beforeEach(function (done) {
this.startTime = Date.now()
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines: this.lines,
@@ -50,15 +59,25 @@ describe('Applying updates to a doc', function () {
if (error != null) {
throw error
}
- setTimeout(done, 200)
+ setTimeout(() => {
+ rclientProjectHistory.get(
+ ProjectHistoryKeys.projectHistoryFirstOpTimestamp({
+ project_id: this.project_id,
+ }),
+ (error, result) => {
+ if (error != null) {
+ throw error
+ }
+ result = parseInt(result, 10)
+ this.firstOpTimestamp = result
+ done()
+ }
+ )
+ }, 200)
}
)
})
- after(function () {
- MockWebApi.getDocument.restore()
- })
-
it('should load the document from the web API', function () {
MockWebApi.getDocument
.calledWith(this.project_id, this.doc_id)
@@ -92,28 +111,44 @@ describe('Applying updates to a doc', function () {
)
})
- it('should set the first op timestamp', function (done) {
- rclientProjectHistory.get(
- ProjectHistoryKeys.projectHistoryFirstOpTimestamp({
- project_id: this.project_id,
- }),
- (error, result) => {
+ it('should set the first op timestamp', function () {
+ this.firstOpTimestamp.should.be.within(this.startTime, Date.now())
+ })
+
+ it('should yield last updated time', function (done) {
+ DocUpdaterClient.getProjectLastUpdatedAt(
+ this.project_id,
+ (error, res, body) => {
if (error != null) {
throw error
}
- result = parseInt(result, 10)
- result.should.be.within(this.startTime, Date.now())
- this.firstOpTimestamp = result
+ res.statusCode.should.equal(200)
+ body.lastUpdatedAt.should.be.within(this.startTime, Date.now())
+ done()
+ }
+ )
+ })
+
+ it('should yield no last updated time for another project', function (done) {
+ DocUpdaterClient.getProjectLastUpdatedAt(
+ DocUpdaterClient.randomId(),
+ (error, res, body) => {
+ if (error != null) {
+ throw error
+ }
+ res.statusCode.should.equal(200)
+ body.should.deep.equal({})
done()
}
)
})
describe('when sending another update', function () {
- before(function (done) {
- this.timeout = 10000
- this.second_update = Object.create(this.update)
+ beforeEach(function (done) {
+ this.timeout(10000)
+ this.second_update = Object.assign({}, this.update)
this.second_update.v = this.version + 1
+ this.secondStartTime = Date.now()
DocUpdaterClient.sendUpdate(
this.project_id,
this.doc_id,
@@ -127,6 +162,24 @@ describe('Applying updates to a doc', function () {
)
})
+ it('should update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) done(error)
+ doc.lines.should.deep.equal([
+ 'one',
+ 'one and a half',
+ 'one and a half',
+ 'two',
+ 'three',
+ ])
+ done()
+ }
+ )
+ })
+
it('should not change the first op timestamp', function (done) {
rclientProjectHistory.get(
ProjectHistoryKeys.projectHistoryFirstOpTimestamp({
@@ -142,14 +195,357 @@ describe('Applying updates to a doc', function () {
}
)
})
+
+ it('should yield last updated time', function (done) {
+ DocUpdaterClient.getProjectLastUpdatedAt(
+ this.project_id,
+ (error, res, body) => {
+ if (error != null) {
+ throw error
+ }
+ res.statusCode.should.equal(200)
+ body.lastUpdatedAt.should.be.within(
+ this.secondStartTime,
+ Date.now()
+ )
+ done()
+ }
+ )
+ })
+ })
+
+ describe('when another client is sending a concurrent update', function () {
+ beforeEach(function (done) {
+ this.timeout(10000)
+ this.otherUpdate = {
+ doc: this.doc_id,
+ op: [{ p: 8, i: 'two and a half\n' }],
+ v: this.version,
+ meta: { source: 'other-random-publicId' },
+ }
+ this.secondStartTime = Date.now()
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.otherUpdate,
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(done, 200)
+ }
+ )
+ })
+
+ it('should update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) done(error)
+ doc.lines.should.deep.equal([
+ 'one',
+ 'one and a half',
+ 'two',
+ 'two and a half',
+ 'three',
+ ])
+ done()
+ }
+ )
+ })
+
+ it('should not change the first op timestamp', function (done) {
+ rclientProjectHistory.get(
+ ProjectHistoryKeys.projectHistoryFirstOpTimestamp({
+ project_id: this.project_id,
+ }),
+ (error, result) => {
+ if (error != null) {
+ throw error
+ }
+ result = parseInt(result, 10)
+ result.should.equal(this.firstOpTimestamp)
+ done()
+ }
+ )
+ })
+
+ it('should yield last updated time', function (done) {
+ DocUpdaterClient.getProjectLastUpdatedAt(
+ this.project_id,
+ (error, res, body) => {
+ if (error != null) {
+ throw error
+ }
+ res.statusCode.should.equal(200)
+ body.lastUpdatedAt.should.be.within(
+ this.secondStartTime,
+ Date.now()
+ )
+ done()
+ }
+ )
+ })
+ })
+ })
+
+ describe('when the document is not loaded (history-ot)', function () {
+ beforeEach(function (done) {
+ this.startTime = Date.now()
+ MockWebApi.insertDoc(this.project_id, this.doc_id, {
+ lines: this.lines,
+ version: this.version,
+ otMigrationStage: 1,
+ })
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.historyOTUpdate,
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(() => {
+ rclientProjectHistory.get(
+ ProjectHistoryKeys.projectHistoryFirstOpTimestamp({
+ project_id: this.project_id,
+ }),
+ (error, result) => {
+ if (error != null) {
+ throw error
+ }
+ result = parseInt(result, 10)
+ this.firstOpTimestamp = result
+ done()
+ }
+ )
+ }, 200)
+ }
+ )
+ })
+
+ it('should load the document from the web API', function () {
+ MockWebApi.getDocument
+ .calledWith(this.project_id, this.doc_id)
+ .should.equal(true)
+ })
+
+ it('should update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) done(error)
+ doc.lines.should.deep.equal(this.result)
+ done()
+ }
+ )
+ })
+
+ it('should push the applied updates to the project history changes api', function (done) {
+ rclientProjectHistory.lrange(
+ ProjectHistoryKeys.projectHistoryOps({ project_id: this.project_id }),
+ 0,
+ -1,
+ (error, updates) => {
+ if (error != null) {
+ throw error
+ }
+ JSON.parse(updates[0]).op.should.deep.equal(this.historyOTUpdate.op)
+ JSON.parse(updates[0]).meta.pathname.should.equal('/a/b/c.tex')
+
+ done()
+ }
+ )
+ })
+
+ it('should set the first op timestamp', function () {
+ this.firstOpTimestamp.should.be.within(this.startTime, Date.now())
+ })
+
+ it('should yield last updated time', function (done) {
+ DocUpdaterClient.getProjectLastUpdatedAt(
+ this.project_id,
+ (error, res, body) => {
+ if (error != null) {
+ throw error
+ }
+ res.statusCode.should.equal(200)
+ body.lastUpdatedAt.should.be.within(this.startTime, Date.now())
+ done()
+ }
+ )
+ })
+
+ it('should yield no last updated time for another project', function (done) {
+ DocUpdaterClient.getProjectLastUpdatedAt(
+ DocUpdaterClient.randomId(),
+ (error, res, body) => {
+ if (error != null) {
+ throw error
+ }
+ res.statusCode.should.equal(200)
+ body.should.deep.equal({})
+ done()
+ }
+ )
+ })
+
+ describe('when sending another update', function () {
+ beforeEach(function (done) {
+ this.timeout(10000)
+ this.second_update = Object.assign({}, this.historyOTUpdate)
+ this.second_update.op = [
+ {
+ textOperation: [4, 'one and a half\n', 24],
+ },
+ ]
+ this.second_update.v = this.version + 1
+ this.secondStartTime = Date.now()
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.second_update,
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(done, 200)
+ }
+ )
+ })
+
+ it('should update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) done(error)
+ doc.lines.should.deep.equal([
+ 'one',
+ 'one and a half',
+ 'one and a half',
+ 'two',
+ 'three',
+ ])
+ done()
+ }
+ )
+ })
+
+ it('should not change the first op timestamp', function (done) {
+ rclientProjectHistory.get(
+ ProjectHistoryKeys.projectHistoryFirstOpTimestamp({
+ project_id: this.project_id,
+ }),
+ (error, result) => {
+ if (error != null) {
+ throw error
+ }
+ result = parseInt(result, 10)
+ result.should.equal(this.firstOpTimestamp)
+ done()
+ }
+ )
+ })
+
+ it('should yield last updated time', function (done) {
+ DocUpdaterClient.getProjectLastUpdatedAt(
+ this.project_id,
+ (error, res, body) => {
+ if (error != null) {
+ throw error
+ }
+ res.statusCode.should.equal(200)
+ body.lastUpdatedAt.should.be.within(
+ this.secondStartTime,
+ Date.now()
+ )
+ done()
+ }
+ )
+ })
+ })
+
+ describe('when another client is sending a concurrent update', function () {
+ beforeEach(function (done) {
+ this.timeout(10000)
+ this.otherUpdate = {
+ doc: this.doc_id,
+ op: [{ textOperation: [8, 'two and a half\n', 5] }],
+ v: this.version,
+ meta: { source: 'other-random-publicId' },
+ }
+ this.secondStartTime = Date.now()
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.otherUpdate,
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(done, 200)
+ }
+ )
+ })
+
+ it('should update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) done(error)
+ doc.lines.should.deep.equal([
+ 'one',
+ 'one and a half',
+ 'two',
+ 'two and a half',
+ 'three',
+ ])
+ done()
+ }
+ )
+ })
+
+ it('should not change the first op timestamp', function (done) {
+ rclientProjectHistory.get(
+ ProjectHistoryKeys.projectHistoryFirstOpTimestamp({
+ project_id: this.project_id,
+ }),
+ (error, result) => {
+ if (error != null) {
+ throw error
+ }
+ result = parseInt(result, 10)
+ result.should.equal(this.firstOpTimestamp)
+ done()
+ }
+ )
+ })
+
+ it('should yield last updated time', function (done) {
+ DocUpdaterClient.getProjectLastUpdatedAt(
+ this.project_id,
+ (error, res, body) => {
+ if (error != null) {
+ throw error
+ }
+ res.statusCode.should.equal(200)
+ body.lastUpdatedAt.should.be.within(
+ this.secondStartTime,
+ Date.now()
+ )
+ done()
+ }
+ )
+ })
})
})
describe('when the document is loaded', function () {
- before(function (done) {
- this.project_id = DocUpdaterClient.randomId()
- this.doc_id = DocUpdaterClient.randomId()
-
+ beforeEach(function (done) {
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines: this.lines,
version: this.version,
@@ -158,7 +554,7 @@ describe('Applying updates to a doc', function () {
if (error != null) {
throw error
}
- sinon.spy(MockWebApi, 'getDocument')
+ sinon.resetHistory()
DocUpdaterClient.sendUpdate(
this.project_id,
this.doc_id,
@@ -173,10 +569,6 @@ describe('Applying updates to a doc', function () {
})
})
- after(function () {
- MockWebApi.getDocument.restore()
- })
-
it('should not need to call the web api', function () {
MockWebApi.getDocument.called.should.equal(false)
})
@@ -208,10 +600,7 @@ describe('Applying updates to a doc', function () {
})
describe('when the document is loaded and is using project-history only', function () {
- before(function (done) {
- this.project_id = DocUpdaterClient.randomId()
- this.doc_id = DocUpdaterClient.randomId()
-
+ beforeEach(function (done) {
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines: this.lines,
version: this.version,
@@ -220,7 +609,7 @@ describe('Applying updates to a doc', function () {
if (error != null) {
throw error
}
- sinon.spy(MockWebApi, 'getDocument')
+ sinon.resetHistory()
DocUpdaterClient.sendUpdate(
this.project_id,
this.doc_id,
@@ -235,10 +624,6 @@ describe('Applying updates to a doc', function () {
})
})
- after(function () {
- MockWebApi.getDocument.restore()
- })
-
it('should update the doc', function (done) {
DocUpdaterClient.getDoc(
this.project_id,
@@ -265,11 +650,61 @@ describe('Applying updates to a doc', function () {
})
})
+ describe('when the document is loaded (history-ot)', function () {
+ beforeEach(function (done) {
+ MockWebApi.insertDoc(this.project_id, this.doc_id, {
+ lines: this.lines,
+ version: this.version,
+ otMigrationStage: 1,
+ })
+ DocUpdaterClient.preloadDoc(this.project_id, this.doc_id, error => {
+ if (error != null) {
+ throw error
+ }
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.historyOTUpdate,
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(done, 200)
+ }
+ )
+ })
+ })
+
+ it('should update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) return done(error)
+ doc.lines.should.deep.equal(this.result)
+ done()
+ }
+ )
+ })
+
+ it('should push the applied updates to the project history changes api', function (done) {
+ rclientProjectHistory.lrange(
+ ProjectHistoryKeys.projectHistoryOps({ project_id: this.project_id }),
+ 0,
+ -1,
+ (error, updates) => {
+ if (error) return done(error)
+ JSON.parse(updates[0]).op.should.deep.equal(this.historyOTUpdate.op)
+ JSON.parse(updates[0]).meta.pathname.should.equal('/a/b/c.tex')
+ done()
+ }
+ )
+ })
+ })
+
describe('when the document has been deleted', function () {
describe('when the ops come in a single linear order', function () {
- before(function (done) {
- this.project_id = DocUpdaterClient.randomId()
- this.doc_id = DocUpdaterClient.randomId()
+ beforeEach(function (done) {
const lines = ['', '', '']
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines,
@@ -289,54 +724,49 @@ describe('Applying updates to a doc', function () {
{ doc_id: this.doc_id, v: 10, op: [{ i: 'd', p: 10 }] },
]
this.my_result = ['hello world', '', '']
- done()
- })
-
- it('should be able to continue applying updates when the project has been deleted', function (done) {
- let update
const actions = []
- for (update of this.updates.slice(0, 6)) {
- ;(update => {
- actions.push(callback =>
- DocUpdaterClient.sendUpdate(
- this.project_id,
- this.doc_id,
- update,
- callback
- )
+ for (const update of this.updates.slice(0, 6)) {
+ actions.push(callback =>
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ update,
+ callback
)
- })(update)
+ )
}
actions.push(callback =>
DocUpdaterClient.deleteDoc(this.project_id, this.doc_id, callback)
)
- for (update of this.updates.slice(6)) {
- ;(update => {
- actions.push(callback =>
- DocUpdaterClient.sendUpdate(
- this.project_id,
- this.doc_id,
- update,
- callback
- )
+ for (const update of this.updates.slice(6)) {
+ actions.push(callback =>
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ update,
+ callback
)
- })(update)
+ )
}
- async.series(actions, error => {
- if (error != null) {
- throw error
+ // process updates
+ actions.push(cb =>
+ DocUpdaterClient.getDoc(this.project_id, this.doc_id, cb)
+ )
+
+ async.series(actions, done)
+ })
+
+ it('should be able to continue applying updates when the project has been deleted', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) return done(error)
+ doc.lines.should.deep.equal(this.my_result)
+ done()
}
- DocUpdaterClient.getDoc(
- this.project_id,
- this.doc_id,
- (error, res, doc) => {
- if (error) return done(error)
- doc.lines.should.deep.equal(this.my_result)
- done()
- }
- )
- })
+ )
})
it('should store the doc ops in the correct order', function (done) {
@@ -358,9 +788,7 @@ describe('Applying updates to a doc', function () {
})
describe('when older ops come in after the delete', function () {
- before(function (done) {
- this.project_id = DocUpdaterClient.randomId()
- this.doc_id = DocUpdaterClient.randomId()
+ beforeEach(function (done) {
const lines = ['', '', '']
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines,
@@ -428,11 +856,9 @@ describe('Applying updates to a doc', function () {
})
describe('with a broken update', function () {
- before(function (done) {
- this.project_id = DocUpdaterClient.randomId()
- this.doc_id = DocUpdaterClient.randomId()
+ beforeEach(function (done) {
this.broken_update = {
- doc_id: this.doc_id,
+ doc: this.doc_id,
v: this.version,
op: [{ d: 'not the correct content', p: 0 }],
}
@@ -482,10 +908,162 @@ describe('Applying updates to a doc', function () {
})
})
+ describe('with a broken update (history-ot)', function () {
+ beforeEach(function (done) {
+ this.broken_update = {
+ doc: this.doc_id,
+ v: this.version,
+ op: [{ textOperation: [99, -1] }],
+ meta: { source: '42' },
+ }
+ MockWebApi.insertDoc(this.project_id, this.doc_id, {
+ lines: this.lines,
+ version: this.version,
+ otMigrationStage: 1,
+ })
+
+ DocUpdaterClient.subscribeToAppliedOps(
+ (this.messageCallback = sinon.stub())
+ )
+
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.broken_update,
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(done, 200)
+ }
+ )
+ })
+
+ it('should not update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) return done(error)
+ doc.lines.should.deep.equal(this.lines)
+ done()
+ }
+ )
+ })
+
+ it('should send a message with an error', function () {
+ this.messageCallback.called.should.equal(true)
+ const [channel, message] = this.messageCallback.args[0]
+ channel.should.equal('applied-ops')
+ JSON.parse(message).should.deep.include({
+ project_id: this.project_id,
+ doc_id: this.doc_id,
+ error:
+ "The operation's base length must be equal to the string's length.",
+ })
+ })
+ })
+
+ describe('when mixing ot types (sharejs-text-ot -> history-ot)', function () {
+ beforeEach(function (done) {
+ MockWebApi.insertDoc(this.project_id, this.doc_id, {
+ lines: this.lines,
+ version: this.version,
+ otMigrationStage: 0,
+ })
+
+ DocUpdaterClient.subscribeToAppliedOps(
+ (this.messageCallback = sinon.stub())
+ )
+
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.historyOTUpdate,
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(done, 200)
+ }
+ )
+ })
+
+ it('should not update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) return done(error)
+ doc.lines.should.deep.equal(this.lines)
+ done()
+ }
+ )
+ })
+
+ it('should send a message with an error', function () {
+ this.messageCallback.called.should.equal(true)
+ const [channel, message] = this.messageCallback.args[0]
+ channel.should.equal('applied-ops')
+ JSON.parse(message).should.deep.include({
+ project_id: this.project_id,
+ doc_id: this.doc_id,
+ error: 'ot type mismatch',
+ })
+ })
+ })
+
+ describe('when mixing ot types (history-ot -> sharejs-text-ot)', function () {
+ beforeEach(function (done) {
+ MockWebApi.insertDoc(this.project_id, this.doc_id, {
+ lines: this.lines,
+ version: this.version,
+ otMigrationStage: 1,
+ })
+
+ DocUpdaterClient.subscribeToAppliedOps(
+ (this.messageCallback = sinon.stub())
+ )
+
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.update,
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(done, 200)
+ }
+ )
+ })
+
+ it('should not update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) return done(error)
+ doc.lines.should.deep.equal(this.lines)
+ done()
+ }
+ )
+ })
+
+ it('should send a message with an error', function () {
+ this.messageCallback.called.should.equal(true)
+ const [channel, message] = this.messageCallback.args[0]
+ channel.should.equal('applied-ops')
+ JSON.parse(message).should.deep.include({
+ project_id: this.project_id,
+ doc_id: this.doc_id,
+ error: 'ot type mismatch',
+ })
+ })
+ })
+
describe('when there is no version in Mongo', function () {
- before(function (done) {
- this.project_id = DocUpdaterClient.randomId()
- this.doc_id = DocUpdaterClient.randomId()
+ beforeEach(function (done) {
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines: this.lines,
})
@@ -522,9 +1100,7 @@ describe('Applying updates to a doc', function () {
})
describe('when the sending duplicate ops', function () {
- before(function (done) {
- this.project_id = DocUpdaterClient.randomId()
- this.doc_id = DocUpdaterClient.randomId()
+ beforeEach(function (done) {
MockWebApi.insertDoc(this.project_id, this.doc_id, {
lines: this.lines,
version: this.version,
@@ -606,12 +1182,88 @@ describe('Applying updates to a doc', function () {
})
})
+ describe('when sending duplicate ops (history-ot)', function () {
+ beforeEach(function (done) {
+ MockWebApi.insertDoc(this.project_id, this.doc_id, {
+ lines: this.lines,
+ version: this.version,
+ otMigrationStage: 1,
+ })
+
+ DocUpdaterClient.subscribeToAppliedOps(
+ (this.messageCallback = sinon.stub())
+ )
+
+ // One user delete 'one', the next turns it into 'once'. The second becomes a NOP.
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ {
+ doc: this.doc_id,
+ op: [{ textOperation: [4, 'one and a half\n', 9] }],
+ v: this.version,
+ meta: {
+ source: 'ikHceq3yfAdQYzBo4-xZ',
+ },
+ },
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(() => {
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ {
+ doc: this.doc_id,
+ op: [
+ {
+ textOperation: [4, 'one and a half\n', 9],
+ },
+ ],
+ v: this.version,
+ dupIfSource: ['ikHceq3yfAdQYzBo4-xZ'],
+ meta: {
+ source: 'ikHceq3yfAdQYzBo4-xZ',
+ },
+ },
+ error => {
+ if (error != null) {
+ throw error
+ }
+ setTimeout(done, 200)
+ }
+ )
+ }, 200)
+ }
+ )
+ })
+
+ it('should update the doc', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) return done(error)
+ doc.lines.should.deep.equal(this.result)
+ done()
+ }
+ )
+ })
+
+ it('should return a message about duplicate ops', function () {
+ this.messageCallback.calledTwice.should.equal(true)
+ this.messageCallback.args[0][0].should.equal('applied-ops')
+ expect(JSON.parse(this.messageCallback.args[0][1]).op.dup).to.be.undefined
+ this.messageCallback.args[1][0].should.equal('applied-ops')
+ expect(JSON.parse(this.messageCallback.args[1][1]).op.dup).to.equal(true)
+ })
+ })
+
describe('when sending updates for a non-existing doc id', function () {
- before(function (done) {
- this.project_id = DocUpdaterClient.randomId()
- this.doc_id = DocUpdaterClient.randomId()
+ beforeEach(function (done) {
this.non_existing = {
- doc_id: this.doc_id,
+ doc: this.doc_id,
v: this.version,
op: [{ d: 'content', p: 0 }],
}
diff --git a/services/document-updater/test/acceptance/js/SettingADocumentTests.js b/services/document-updater/test/acceptance/js/SettingADocumentTests.js
index 5b0c4ab281..e1bc54dc90 100644
--- a/services/document-updater/test/acceptance/js/SettingADocumentTests.js
+++ b/services/document-updater/test/acceptance/js/SettingADocumentTests.js
@@ -196,6 +196,167 @@ describe('Setting a document', function () {
})
})
+ describe('when the updated doc exists in the doc updater (history-ot)', function () {
+ before(function (done) {
+ numberOfReceivedUpdates = 0
+ this.project_id = DocUpdaterClient.randomId()
+ this.doc_id = DocUpdaterClient.randomId()
+ this.historyOTUpdate = {
+ doc: this.doc_id,
+ op: [{ textOperation: [4, 'one and a half\n', 9] }],
+ v: this.version,
+ meta: { source: 'random-publicId' },
+ }
+ MockWebApi.insertDoc(this.project_id, this.doc_id, {
+ lines: this.lines,
+ version: this.version,
+ otMigrationStage: 1,
+ })
+ DocUpdaterClient.preloadDoc(this.project_id, this.doc_id, error => {
+ if (error) {
+ throw error
+ }
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.historyOTUpdate,
+ error => {
+ if (error) {
+ throw error
+ }
+ setTimeout(() => {
+ DocUpdaterClient.setDocLines(
+ this.project_id,
+ this.doc_id,
+ this.newLines,
+ this.source,
+ this.user_id,
+ false,
+ (error, res, body) => {
+ if (error) {
+ return done(error)
+ }
+ this.statusCode = res.statusCode
+ this.body = body
+ done()
+ }
+ )
+ }, 200)
+ }
+ )
+ })
+ })
+
+ after(function () {
+ MockProjectHistoryApi.flushProject.resetHistory()
+ MockWebApi.setDocument.resetHistory()
+ })
+
+ it('should return a 200 status code', function () {
+ this.statusCode.should.equal(200)
+ })
+
+ it('should emit two updates (from sendUpdate and setDocLines)', function () {
+ expect(numberOfReceivedUpdates).to.equal(2)
+ })
+
+ it('should send the updated doc lines and version to the web api', function () {
+ MockWebApi.setDocument
+ .calledWith(this.project_id, this.doc_id, this.newLines)
+ .should.equal(true)
+ })
+
+ it('should update the lines in the doc updater', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) {
+ return done(error)
+ }
+ doc.lines.should.deep.equal(this.newLines)
+ done()
+ }
+ )
+ })
+
+ it('should bump the version in the doc updater', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) {
+ return done(error)
+ }
+ doc.version.should.equal(this.version + 2)
+ done()
+ }
+ )
+ })
+
+ it('should leave the document in redis', function (done) {
+ docUpdaterRedis.get(
+ Keys.docLines({ doc_id: this.doc_id }),
+ (error, lines) => {
+ if (error) {
+ throw error
+ }
+ expect(JSON.parse(lines)).to.deep.equal({
+ content: this.newLines.join('\n'),
+ })
+ done()
+ }
+ )
+ })
+
+ it('should return the mongo rev in the json response', function () {
+ this.body.should.deep.equal({ rev: '123' })
+ })
+
+ describe('when doc has the same contents', function () {
+ beforeEach(function (done) {
+ numberOfReceivedUpdates = 0
+ DocUpdaterClient.setDocLines(
+ this.project_id,
+ this.doc_id,
+ this.newLines,
+ this.source,
+ this.user_id,
+ false,
+ (error, res, body) => {
+ if (error) {
+ return done(error)
+ }
+ this.statusCode = res.statusCode
+ this.body = body
+ done()
+ }
+ )
+ })
+
+ it('should not bump the version in doc updater', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, doc) => {
+ if (error) {
+ return done(error)
+ }
+ doc.version.should.equal(this.version + 2)
+ done()
+ }
+ )
+ })
+
+ it('should not emit any updates', function (done) {
+ setTimeout(() => {
+ expect(numberOfReceivedUpdates).to.equal(0)
+ done()
+ }, 100) // delay by 100ms: make sure we do not check too early!
+ })
+ })
+ })
+
describe('when the updated doc does not exist in the doc updater', function () {
before(function (done) {
this.project_id = DocUpdaterClient.randomId()
@@ -525,4 +686,285 @@ describe('Setting a document', function () {
})
})
})
+
+ describe('with track changes (history-ot)', function () {
+ const lines = ['one', 'one and a half', 'two', 'three']
+ const userId = DocUpdaterClient.randomId()
+ const ts = new Date().toISOString()
+ beforeEach(function (done) {
+ numberOfReceivedUpdates = 0
+ this.newLines = ['one', 'two', 'three']
+ this.project_id = DocUpdaterClient.randomId()
+ this.doc_id = DocUpdaterClient.randomId()
+ this.historyOTUpdate = {
+ doc: this.doc_id,
+ op: [
+ {
+ textOperation: [
+ 4,
+ {
+ r: 'one and a half\n'.length,
+ tracking: {
+ type: 'delete',
+ userId,
+ ts,
+ },
+ },
+ 9,
+ ],
+ },
+ ],
+ v: this.version,
+ meta: { source: 'random-publicId' },
+ }
+ MockWebApi.insertDoc(this.project_id, this.doc_id, {
+ lines,
+ version: this.version,
+ otMigrationStage: 1,
+ })
+ DocUpdaterClient.preloadDoc(this.project_id, this.doc_id, error => {
+ if (error) {
+ throw error
+ }
+ DocUpdaterClient.sendUpdate(
+ this.project_id,
+ this.doc_id,
+ this.historyOTUpdate,
+ error => {
+ if (error) {
+ throw error
+ }
+ DocUpdaterClient.waitForPendingUpdates(
+ this.project_id,
+ this.doc_id,
+ done
+ )
+ }
+ )
+ })
+ })
+
+ afterEach(function () {
+ MockProjectHistoryApi.flushProject.resetHistory()
+ MockWebApi.setDocument.resetHistory()
+ })
+ it('should record tracked changes', function (done) {
+ docUpdaterRedis.get(
+ Keys.docLines({ doc_id: this.doc_id }),
+ (error, data) => {
+ if (error) {
+ throw error
+ }
+ expect(JSON.parse(data)).to.deep.equal({
+ content: lines.join('\n'),
+ trackedChanges: [
+ {
+ range: {
+ pos: 4,
+ length: 15,
+ },
+ tracking: {
+ ts,
+ type: 'delete',
+ userId,
+ },
+ },
+ ],
+ })
+ done()
+ }
+ )
+ })
+
+ it('should apply the change', function (done) {
+ DocUpdaterClient.getDoc(
+ this.project_id,
+ this.doc_id,
+ (error, res, data) => {
+ if (error) {
+ throw error
+ }
+ expect(data.lines).to.deep.equal(this.newLines)
+ done()
+ }
+ )
+ })
+ const cases = [
+ {
+ name: 'when resetting the content',
+ lines,
+ want: {
+ content: 'one\none and a half\none and a half\ntwo\nthree',
+ trackedChanges: [
+ {
+ range: {
+ pos: 'one and a half\n'.length + 4,
+ length: 15,
+ },
+ tracking: {
+ ts,
+ type: 'delete',
+ userId,
+ },
+ },
+ ],
+ },
+ },
+ {
+ name: 'when adding content before a tracked delete',
+ lines: ['one', 'INSERT', 'two', 'three'],
+ want: {
+ content: 'one\nINSERT\none and a half\ntwo\nthree',
+ trackedChanges: [
+ {
+ range: {
+ pos: 'INSERT\n'.length + 4,
+ length: 15,
+ },
+ tracking: {
+ ts,
+ type: 'delete',
+ userId,
+ },
+ },
+ ],
+ },
+ },
+ {
+ name: 'when adding content after a tracked delete',
+ lines: ['one', 'two', 'INSERT', 'three'],
+ want: {
+ content: 'one\none and a half\ntwo\nINSERT\nthree',
+ trackedChanges: [
+ {
+ range: {
+ pos: 4,
+ length: 15,
+ },
+ tracking: {
+ ts,
+ type: 'delete',
+ userId,
+ },
+ },
+ ],
+ },
+ },
+ {
+ name: 'when deleting content before a tracked delete',
+ lines: ['two', 'three'],
+ want: {
+ content: 'one and a half\ntwo\nthree',
+ trackedChanges: [
+ {
+ range: {
+ pos: 0,
+ length: 15,
+ },
+ tracking: {
+ ts,
+ type: 'delete',
+ userId,
+ },
+ },
+ ],
+ },
+ },
+ {
+ name: 'when deleting content after a tracked delete',
+ lines: ['one', 'two'],
+ want: {
+ content: 'one\none and a half\ntwo',
+ trackedChanges: [
+ {
+ range: {
+ pos: 4,
+ length: 15,
+ },
+ tracking: {
+ ts,
+ type: 'delete',
+ userId,
+ },
+ },
+ ],
+ },
+ },
+ {
+ name: 'when deleting content immediately after a tracked delete',
+ lines: ['one', 'three'],
+ want: {
+ content: 'one\none and a half\nthree',
+ trackedChanges: [
+ {
+ range: {
+ pos: 4,
+ length: 15,
+ },
+ tracking: {
+ ts,
+ type: 'delete',
+ userId,
+ },
+ },
+ ],
+ },
+ },
+ {
+ name: 'when deleting content across a tracked delete',
+ lines: ['onethree'],
+ want: {
+ content: 'oneone and a half\nthree',
+ trackedChanges: [
+ {
+ range: {
+ pos: 3,
+ length: 15,
+ },
+ tracking: {
+ ts,
+ type: 'delete',
+ userId,
+ },
+ },
+ ],
+ },
+ },
+ ]
+
+ for (const { name, lines, want } of cases) {
+ describe(name, function () {
+ beforeEach(function (done) {
+ DocUpdaterClient.setDocLines(
+ this.project_id,
+ this.doc_id,
+ lines,
+ this.source,
+ userId,
+ false,
+ (error, res, body) => {
+ if (error) {
+ return done(error)
+ }
+ this.statusCode = res.statusCode
+ this.body = body
+ done()
+ }
+ )
+ })
+ it('should update accordingly', function (done) {
+ docUpdaterRedis.get(
+ Keys.docLines({ doc_id: this.doc_id }),
+ (error, data) => {
+ if (error) {
+ throw error
+ }
+ expect(JSON.parse(data)).to.deep.equal(want)
+ done()
+ }
+ )
+ })
+ })
+ }
+ })
})
diff --git a/services/document-updater/test/acceptance/js/helpers/DocUpdaterApp.js b/services/document-updater/test/acceptance/js/helpers/DocUpdaterApp.js
index 33c6882138..d34996ca7c 100644
--- a/services/document-updater/test/acceptance/js/helpers/DocUpdaterApp.js
+++ b/services/document-updater/test/acceptance/js/helpers/DocUpdaterApp.js
@@ -9,7 +9,6 @@
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const app = require('../../../../app')
-require('@overleaf/logger').logger.level('fatal')
module.exports = {
running: false,
diff --git a/services/document-updater/test/acceptance/js/helpers/DocUpdaterClient.js b/services/document-updater/test/acceptance/js/helpers/DocUpdaterClient.js
index 4ed4f929de..0a4ec8922e 100644
--- a/services/document-updater/test/acceptance/js/helpers/DocUpdaterClient.js
+++ b/services/document-updater/test/acceptance/js/helpers/DocUpdaterClient.js
@@ -119,6 +119,18 @@ module.exports = DocUpdaterClient = {
)
},
+ getProjectLastUpdatedAt(projectId, callback) {
+ request.get(
+ `http://127.0.0.1:3003/project/${projectId}/last_updated_at`,
+ (error, res, body) => {
+ if (body != null && res.statusCode >= 200 && res.statusCode < 300) {
+ body = JSON.parse(body)
+ }
+ callback(error, res, body)
+ }
+ )
+ },
+
preloadDoc(projectId, docId, callback) {
DocUpdaterClient.getDoc(projectId, docId, callback)
},
diff --git a/services/document-updater/test/setup.js b/services/document-updater/test/setup.js
index 1099724329..8ba17d922f 100644
--- a/services/document-updater/test/setup.js
+++ b/services/document-updater/test/setup.js
@@ -31,6 +31,7 @@ SandboxedModule.configure({
requires: {
'@overleaf/logger': stubs.logger,
'mongodb-legacy': require('mongodb-legacy'), // for ObjectId comparisons
+ 'overleaf-editor-core': require('overleaf-editor-core'), // does not play nice with sandbox
},
globals: { Buffer, JSON, Math, console, process },
sourceTransformers: {
diff --git a/services/document-updater/test/unit/js/DocumentManager/DocumentManagerTests.js b/services/document-updater/test/unit/js/DocumentManager/DocumentManagerTests.js
index 5dc3d1c88f..1816579103 100644
--- a/services/document-updater/test/unit/js/DocumentManager/DocumentManagerTests.js
+++ b/services/document-updater/test/unit/js/DocumentManager/DocumentManagerTests.js
@@ -49,6 +49,9 @@ describe('DocumentManager', function () {
applyUpdate: sinon.stub().resolves(),
},
}
+ this.HistoryOTUpdateManager = {
+ applyUpdate: sinon.stub().resolves(),
+ }
this.RangesManager = {
acceptChanges: sinon.stub(),
deleteComment: sinon.stub(),
@@ -66,6 +69,7 @@ describe('DocumentManager', function () {
'./Metrics': this.Metrics,
'./DiffCodec': this.DiffCodec,
'./UpdateManager': this.UpdateManager,
+ './HistoryOTUpdateManager': this.HistoryOTUpdateManager,
'./RangesManager': this.RangesManager,
'./Errors': Errors,
'@overleaf/settings': this.Settings,
@@ -222,6 +226,7 @@ describe('DocumentManager', function () {
ranges: this.ranges,
pathname: this.pathname,
projectHistoryId: this.projectHistoryId,
+ type: 'sharejs-text-ot',
})
this.RedisManager.promises.getPreviousDocOps.resolves(this.ops)
this.result = await this.DocumentManager.promises.getDocAndRecentOps(
@@ -251,6 +256,7 @@ describe('DocumentManager', function () {
ranges: this.ranges,
pathname: this.pathname,
projectHistoryId: this.projectHistoryId,
+ type: 'sharejs-text-ot',
})
})
})
@@ -263,6 +269,7 @@ describe('DocumentManager', function () {
ranges: this.ranges,
pathname: this.pathname,
projectHistoryId: this.projectHistoryId,
+ type: 'sharejs-text-ot',
})
this.RedisManager.promises.getPreviousDocOps.resolves(this.ops)
this.result = await this.DocumentManager.promises.getDocAndRecentOps(
@@ -290,6 +297,7 @@ describe('DocumentManager', function () {
ranges: this.ranges,
pathname: this.pathname,
projectHistoryId: this.projectHistoryId,
+ type: 'sharejs-text-ot',
})
})
})
@@ -333,6 +341,7 @@ describe('DocumentManager', function () {
unflushedTime: this.unflushedTime,
alreadyLoaded: true,
historyRangesSupport: this.historyRangesSupport,
+ type: 'sharejs-text-ot',
})
})
})
@@ -400,6 +409,7 @@ describe('DocumentManager', function () {
unflushedTime: null,
alreadyLoaded: false,
historyRangesSupport: this.historyRangesSupport,
+ type: 'sharejs-text-ot',
})
})
})
@@ -835,6 +845,77 @@ describe('DocumentManager', function () {
})
})
+ describe('getComment', function () {
+ beforeEach(function () {
+ this.ranges.comments = [
+ {
+ id: 'mock-comment-id-1',
+ },
+ {
+ id: 'mock-comment-id-2',
+ },
+ ]
+ this.DocumentManager.promises.getDoc = sinon.stub().resolves({
+ lines: this.lines,
+ version: this.version,
+ ranges: this.ranges,
+ })
+ })
+
+ describe('when comment exists', function () {
+ beforeEach(async function () {
+ await expect(
+ this.DocumentManager.promises.getComment(
+ this.project_id,
+ this.doc_id,
+ 'mock-comment-id-1'
+ )
+ ).to.eventually.deep.equal({
+ comment: { id: 'mock-comment-id-1' },
+ })
+ })
+
+ it("should get the document's current ranges", function () {
+ this.DocumentManager.promises.getDoc
+ .calledWith(this.project_id, this.doc_id)
+ .should.equal(true)
+ })
+ })
+
+ describe('when comment doesnt exists', function () {
+ beforeEach(async function () {
+ await expect(
+ this.DocumentManager.promises.getComment(
+ this.project_id,
+ this.doc_id,
+ 'mock-comment-id-x'
+ )
+ ).to.be.rejectedWith(Errors.NotFoundError)
+ })
+
+ it("should get the document's current ranges", function () {
+ this.DocumentManager.promises.getDoc
+ .calledWith(this.project_id, this.doc_id)
+ .should.equal(true)
+ })
+ })
+
+ describe('when the doc is not found', function () {
+ beforeEach(async function () {
+ this.DocumentManager.promises.getDoc = sinon
+ .stub()
+ .resolves({ lines: null, version: null, ranges: null })
+ await expect(
+ this.DocumentManager.promises.acceptChanges(
+ this.project_id,
+ this.doc_id,
+ [this.change_id]
+ )
+ ).to.be.rejectedWith(Errors.NotFoundError)
+ })
+ })
+ })
+
describe('deleteComment', function () {
beforeEach(function () {
this.comment_id = 'mock-comment-id'
diff --git a/services/document-updater/test/unit/js/HistoryManager/HistoryManagerTests.js b/services/document-updater/test/unit/js/HistoryManager/HistoryManagerTests.js
index d0ac6cb9e0..2a5fb29b6d 100644
--- a/services/document-updater/test/unit/js/HistoryManager/HistoryManagerTests.js
+++ b/services/document-updater/test/unit/js/HistoryManager/HistoryManagerTests.js
@@ -14,6 +14,7 @@ describe('HistoryManager', function () {
requires: {
request: (this.request = {}),
'@overleaf/settings': (this.Settings = {
+ shortHistoryQueues: [],
apis: {
project_history: {
url: 'http://project_history.example.com',
@@ -118,7 +119,7 @@ describe('HistoryManager', function () {
beforeEach(function () {
this.HistoryManager.shouldFlushHistoryOps = sinon.stub()
this.HistoryManager.shouldFlushHistoryOps
- .withArgs(this.project_ops_length)
+ .withArgs(this.project_id, this.project_ops_length)
.returns(true)
this.HistoryManager.recordAndFlushHistoryOps(
@@ -139,7 +140,7 @@ describe('HistoryManager', function () {
beforeEach(function () {
this.HistoryManager.shouldFlushHistoryOps = sinon.stub()
this.HistoryManager.shouldFlushHistoryOps
- .withArgs(this.project_ops_length)
+ .withArgs(this.project_id, this.project_ops_length)
.returns(false)
this.HistoryManager.recordAndFlushHistoryOps(
@@ -157,6 +158,7 @@ describe('HistoryManager', function () {
describe('shouldFlushHistoryOps', function () {
it('should return false if the number of ops is not known', function () {
this.HistoryManager.shouldFlushHistoryOps(
+ this.project_id,
null,
['a', 'b', 'c'].length,
1
@@ -168,6 +170,7 @@ describe('HistoryManager', function () {
// Previously we were on 11 ops
// We didn't pass over a multiple of 5
this.HistoryManager.shouldFlushHistoryOps(
+ this.project_id,
14,
['a', 'b', 'c'].length,
5
@@ -178,6 +181,7 @@ describe('HistoryManager', function () {
// Previously we were on 12 ops
// We've reached a new multiple of 5
this.HistoryManager.shouldFlushHistoryOps(
+ this.project_id,
15,
['a', 'b', 'c'].length,
5
@@ -189,11 +193,22 @@ describe('HistoryManager', function () {
// Previously we were on 16 ops
// We didn't pass over a multiple of 5
this.HistoryManager.shouldFlushHistoryOps(
+ this.project_id,
17,
['a', 'b', 'c'].length,
5
).should.equal(true)
})
+
+ it('should return true if the project has a short queue', function () {
+ this.Settings.shortHistoryQueues = [this.project_id]
+ this.HistoryManager.shouldFlushHistoryOps(
+ this.project_id,
+ 14,
+ ['a', 'b', 'c'].length,
+ 5
+ ).should.equal(true)
+ })
})
})
@@ -217,34 +232,75 @@ describe('HistoryManager', function () {
.stub()
.yields()
this.DocumentManager.resyncDocContentsWithLock = sinon.stub().yields()
- this.HistoryManager.resyncProjectHistory(
- this.project_id,
- this.projectHistoryId,
- this.docs,
- this.files,
- this.callback
- )
})
- it('should queue a project structure reync', function () {
- this.ProjectHistoryRedisManager.queueResyncProjectStructure
- .calledWith(
+ describe('full sync', function () {
+ beforeEach(function () {
+ this.HistoryManager.resyncProjectHistory(
this.project_id,
this.projectHistoryId,
this.docs,
- this.files
+ this.files,
+ {},
+ this.callback
)
- .should.equal(true)
+ })
+
+ it('should queue a project structure reync', function () {
+ this.ProjectHistoryRedisManager.queueResyncProjectStructure
+ .calledWith(
+ this.project_id,
+ this.projectHistoryId,
+ this.docs,
+ this.files
+ )
+ .should.equal(true)
+ })
+
+ it('should queue doc content reyncs', function () {
+ this.DocumentManager.resyncDocContentsWithLock
+ .calledWith(this.project_id, this.docs[0].doc, this.docs[0].path)
+ .should.equal(true)
+ })
+
+ it('should call the callback', function () {
+ this.callback.called.should.equal(true)
+ })
})
- it('should queue doc content reyncs', function () {
- this.DocumentManager.resyncDocContentsWithLock
- .calledWith(this.project_id, this.docs[0].doc, this.docs[0].path)
- .should.equal(true)
- })
+ describe('resyncProjectStructureOnly=true', function () {
+ beforeEach(function () {
+ this.HistoryManager.resyncProjectHistory(
+ this.project_id,
+ this.projectHistoryId,
+ this.docs,
+ this.files,
+ { resyncProjectStructureOnly: true },
+ this.callback
+ )
+ })
- it('should call the callback', function () {
- this.callback.called.should.equal(true)
+ it('should queue a project structure reync', function () {
+ this.ProjectHistoryRedisManager.queueResyncProjectStructure
+ .calledWith(
+ this.project_id,
+ this.projectHistoryId,
+ this.docs,
+ this.files,
+ { resyncProjectStructureOnly: true }
+ )
+ .should.equal(true)
+ })
+
+ it('should not queue doc content reyncs', function () {
+ this.DocumentManager.resyncDocContentsWithLock.called.should.equal(
+ false
+ )
+ })
+
+ it('should call the callback', function () {
+ this.callback.called.should.equal(true)
+ })
})
})
})
diff --git a/services/document-updater/test/unit/js/HttpController/HttpControllerTests.js b/services/document-updater/test/unit/js/HttpController/HttpControllerTests.js
index d6aa03ab52..333da10d15 100644
--- a/services/document-updater/test/unit/js/HttpController/HttpControllerTests.js
+++ b/services/document-updater/test/unit/js/HttpController/HttpControllerTests.js
@@ -26,6 +26,7 @@ describe('HttpController', function () {
this.Metrics.Timer.prototype.done = sinon.stub()
this.project_id = 'project-id-123'
+ this.projectHistoryId = '123'
this.doc_id = 'doc-id-123'
this.source = 'editor'
this.next = sinon.stub()
@@ -65,7 +66,9 @@ describe('HttpController', function () {
this.version,
[],
this.ranges,
- this.pathname
+ this.pathname,
+ this.projectHistoryId,
+ 'sharejs-text-ot'
)
this.HttpController.getDoc(this.req, this.res, this.next)
})
@@ -77,17 +80,16 @@ describe('HttpController', function () {
})
it('should return the doc as JSON', function () {
- this.res.json
- .calledWith({
- id: this.doc_id,
- lines: this.lines,
- version: this.version,
- ops: [],
- ranges: this.ranges,
- pathname: this.pathname,
- ttlInS: 42,
- })
- .should.equal(true)
+ this.res.json.should.have.been.calledWith({
+ id: this.doc_id,
+ lines: this.lines,
+ version: this.version,
+ ops: [],
+ ranges: this.ranges,
+ pathname: this.pathname,
+ ttlInS: 42,
+ type: 'sharejs-text-ot',
+ })
})
it('should log the request', function () {
@@ -115,7 +117,9 @@ describe('HttpController', function () {
this.version,
this.ops,
this.ranges,
- this.pathname
+ this.pathname,
+ this.projectHistoryId,
+ 'sharejs-text-ot'
)
this.req.query = { fromVersion: `${this.fromVersion}` }
this.HttpController.getDoc(this.req, this.res, this.next)
@@ -128,17 +132,16 @@ describe('HttpController', function () {
})
it('should return the doc as JSON', function () {
- this.res.json
- .calledWith({
- id: this.doc_id,
- lines: this.lines,
- version: this.version,
- ops: this.ops,
- ranges: this.ranges,
- pathname: this.pathname,
- ttlInS: 42,
- })
- .should.equal(true)
+ this.res.json.should.have.been.calledWith({
+ id: this.doc_id,
+ lines: this.lines,
+ version: this.version,
+ ops: this.ops,
+ ranges: this.ranges,
+ pathname: this.pathname,
+ ttlInS: 42,
+ type: 'sharejs-text-ot',
+ })
})
it('should log the request', function () {
@@ -184,6 +187,65 @@ describe('HttpController', function () {
})
})
+ describe('getComment', function () {
+ beforeEach(function () {
+ this.ranges = {
+ changes: 'mock',
+ comments: [
+ {
+ id: 'comment-id-1',
+ },
+ {
+ id: 'comment-id-2',
+ },
+ ],
+ }
+ this.req = {
+ params: {
+ project_id: this.project_id,
+ doc_id: this.doc_id,
+ comment_id: this.comment_id,
+ },
+ query: {},
+ body: {},
+ }
+ })
+
+ beforeEach(function () {
+ this.DocumentManager.getCommentWithLock = sinon
+ .stub()
+ .callsArgWith(3, null, this.ranges.comments[0])
+ this.HttpController.getComment(this.req, this.res, this.next)
+ })
+
+ it('should get the comment', function () {
+ this.DocumentManager.getCommentWithLock
+ .calledWith(this.project_id, this.doc_id, this.comment_id)
+ .should.equal(true)
+ })
+
+ it('should return the comment as JSON', function () {
+ this.res.json
+ .calledWith({
+ id: 'comment-id-1',
+ })
+ .should.equal(true)
+ })
+
+ it('should log the request', function () {
+ this.logger.debug
+ .calledWith(
+ {
+ projectId: this.project_id,
+ docId: this.doc_id,
+ commentId: this.comment_id,
+ },
+ 'getting comment via http'
+ )
+ .should.equal(true)
+ })
+ })
+
describe('setDoc', function () {
beforeEach(function () {
this.lines = ['one', 'two', 'three']
diff --git a/services/document-updater/test/unit/js/Limits/LimitsTests.js b/services/document-updater/test/unit/js/Limits/LimitsTests.js
index 34a5c13c26..11ca38746a 100644
--- a/services/document-updater/test/unit/js/Limits/LimitsTests.js
+++ b/services/document-updater/test/unit/js/Limits/LimitsTests.js
@@ -81,4 +81,88 @@ describe('Limits', function () {
})
})
})
+
+ describe('stringFileDataContentIsTooLarge', function () {
+ it('should handle small docs', function () {
+ expect(
+ this.Limits.stringFileDataContentIsTooLarge({ content: '' }, 123)
+ ).to.equal(false)
+ })
+ it('should handle docs at the limit', function () {
+ expect(
+ this.Limits.stringFileDataContentIsTooLarge(
+ { content: 'x'.repeat(123) },
+ 123
+ )
+ ).to.equal(false)
+ })
+ it('should handle docs above the limit', function () {
+ expect(
+ this.Limits.stringFileDataContentIsTooLarge(
+ { content: 'x'.repeat(123 + 1) },
+ 123
+ )
+ ).to.equal(true)
+ })
+ it('should handle docs above the limit and below with tracked-deletes removed', function () {
+ expect(
+ this.Limits.stringFileDataContentIsTooLarge(
+ {
+ content: 'x'.repeat(123 + 1),
+ trackedChanges: [
+ {
+ range: { pos: 1, length: 1 },
+ tracking: {
+ type: 'delete',
+ ts: '2025-06-16T14:31:44.910Z',
+ userId: 'user-id',
+ },
+ },
+ ],
+ },
+ 123
+ )
+ ).to.equal(false)
+ })
+ it('should handle docs above the limit and above with tracked-deletes removed', function () {
+ expect(
+ this.Limits.stringFileDataContentIsTooLarge(
+ {
+ content: 'x'.repeat(123 + 2),
+ trackedChanges: [
+ {
+ range: { pos: 1, length: 1 },
+ tracking: {
+ type: 'delete',
+ ts: '2025-06-16T14:31:44.910Z',
+ userId: 'user-id',
+ },
+ },
+ ],
+ },
+ 123
+ )
+ ).to.equal(true)
+ })
+ it('should handle docs above the limit and with tracked-inserts', function () {
+ expect(
+ this.Limits.stringFileDataContentIsTooLarge(
+ {
+ content: 'x'.repeat(123 + 1),
+ trackedChanges: [
+ {
+ range: { pos: 1, length: 1 },
+ tracking: {
+ type: 'insert',
+ ts: '2025-06-16T14:31:44.910Z',
+ userId: 'user-id',
+ },
+ },
+ ],
+ },
+ 123
+ )
+ ).to.equal(true)
+ })
+ })
})
diff --git a/services/document-updater/test/unit/js/ProjectHistoryRedisManager/ProjectHistoryRedisManagerTests.js b/services/document-updater/test/unit/js/ProjectHistoryRedisManager/ProjectHistoryRedisManagerTests.js
index 760385b176..ad6c121dfb 100644
--- a/services/document-updater/test/unit/js/ProjectHistoryRedisManager/ProjectHistoryRedisManagerTests.js
+++ b/services/document-updater/test/unit/js/ProjectHistoryRedisManager/ProjectHistoryRedisManagerTests.js
@@ -15,6 +15,7 @@ describe('ProjectHistoryRedisManager', function () {
this.Limits = {
docIsTooLarge: sinon.stub().returns(false),
+ stringFileDataContentIsTooLarge: sinon.stub().returns(false),
}
this.ProjectHistoryRedisManager = SandboxedModule.require(modulePath, {
@@ -61,22 +62,18 @@ describe('ProjectHistoryRedisManager', function () {
})
it('should queue an update', function () {
- this.multi.rpush
- .calledWithExactly(
- `ProjectHistory:Ops:${this.project_id}`,
- this.ops[0],
- this.ops[1]
- )
- .should.equal(true)
+ this.multi.rpush.should.have.been.calledWithExactly(
+ `ProjectHistory:Ops:${this.project_id}`,
+ this.ops[0],
+ this.ops[1]
+ )
})
it('should set the queue timestamp if not present', function () {
- this.multi.setnx
- .calledWithExactly(
- `ProjectHistory:FirstOpTimestamp:${this.project_id}`,
- Date.now()
- )
- .should.equal(true)
+ this.multi.setnx.should.have.been.calledWithExactly(
+ `ProjectHistory:FirstOpTimestamp:${this.project_id}`,
+ Date.now()
+ )
})
})
@@ -118,9 +115,10 @@ describe('ProjectHistoryRedisManager', function () {
file: this.file_id,
}
- this.ProjectHistoryRedisManager.promises.queueOps
- .calledWithExactly(this.project_id, JSON.stringify(update))
- .should.equal(true)
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(update)
+ )
})
})
@@ -166,9 +164,10 @@ describe('ProjectHistoryRedisManager', function () {
doc: this.doc_id,
}
- this.ProjectHistoryRedisManager.promises.queueOps
- .calledWithExactly(this.project_id, JSON.stringify(update))
- .should.equal(true)
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(update)
+ )
})
it('should queue an update with file metadata', async function () {
@@ -350,9 +349,10 @@ describe('ProjectHistoryRedisManager', function () {
doc: this.doc_id,
}
- this.ProjectHistoryRedisManager.promises.queueOps
- .calledWithExactly(this.project_id, JSON.stringify(update))
- .should.equal(true)
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(update)
+ )
})
it('should not forward ranges if history ranges support is undefined', async function () {
@@ -402,9 +402,10 @@ describe('ProjectHistoryRedisManager', function () {
doc: this.doc_id,
}
- this.ProjectHistoryRedisManager.promises.queueOps
- .calledWithExactly(this.project_id, JSON.stringify(update))
- .should.equal(true)
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(update)
+ )
})
it('should pass "false" as the createdBlob field if not provided', async function () {
@@ -432,9 +433,10 @@ describe('ProjectHistoryRedisManager', function () {
doc: this.doc_id,
}
- this.ProjectHistoryRedisManager.promises.queueOps
- .calledWithExactly(this.project_id, JSON.stringify(update))
- .should.equal(true)
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(update)
+ )
})
it('should pass through the value of the createdBlob field', async function () {
@@ -463,9 +465,10 @@ describe('ProjectHistoryRedisManager', function () {
doc: this.doc_id,
}
- this.ProjectHistoryRedisManager.promises.queueOps
- .calledWithExactly(this.project_id, JSON.stringify(update))
- .should.equal(true)
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(update)
+ )
})
})
@@ -493,8 +496,8 @@ describe('ProjectHistoryRedisManager', function () {
beforeEach(async function () {
this.update = {
resyncDocContent: {
- content: 'one\ntwo',
version: this.version,
+ content: 'one\ntwo',
},
projectHistoryId: this.projectHistoryId,
path: this.pathname,
@@ -516,19 +519,18 @@ describe('ProjectHistoryRedisManager', function () {
})
it('should check if the doc is too large', function () {
- this.Limits.docIsTooLarge
- .calledWith(
- JSON.stringify(this.update).length,
- this.lines,
- this.settings.max_doc_length
- )
- .should.equal(true)
+ this.Limits.docIsTooLarge.should.have.been.calledWith(
+ JSON.stringify(this.update).length,
+ this.lines,
+ this.settings.max_doc_length
+ )
})
it('should queue an update', function () {
- this.ProjectHistoryRedisManager.promises.queueOps
- .calledWithExactly(this.project_id, JSON.stringify(this.update))
- .should.equal(true)
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(this.update)
+ )
})
})
@@ -551,9 +553,8 @@ describe('ProjectHistoryRedisManager', function () {
})
it('should not queue an update if the doc is too large', function () {
- this.ProjectHistoryRedisManager.promises.queueOps.called.should.equal(
- false
- )
+ this.ProjectHistoryRedisManager.promises.queueOps.should.not.have.been
+ .called
})
})
@@ -561,10 +562,10 @@ describe('ProjectHistoryRedisManager', function () {
beforeEach(async function () {
this.update = {
resyncDocContent: {
- content: 'onedeleted\ntwo',
version: this.version,
ranges: this.ranges,
resolvedCommentIds: this.resolvedCommentIds,
+ content: 'onedeleted\ntwo',
},
projectHistoryId: this.projectHistoryId,
path: this.pathname,
@@ -601,9 +602,76 @@ describe('ProjectHistoryRedisManager', function () {
})
it('should queue an update', function () {
- this.ProjectHistoryRedisManager.promises.queueOps
- .calledWithExactly(this.project_id, JSON.stringify(this.update))
- .should.equal(true)
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(this.update)
+ )
+ })
+ })
+
+ describe('history-ot', function () {
+ beforeEach(async function () {
+ this.lines = {
+ content: 'onedeleted\ntwo',
+ comments: [{ id: 'id1', ranges: [{ pos: 0, length: 3 }] }],
+ trackedChanges: [
+ {
+ range: { pos: 3, length: 7 },
+ tracking: {
+ type: 'delete',
+ userId: 'user-id',
+ ts: '2025-06-16T14:31:44.910Z',
+ },
+ },
+ ],
+ }
+ this.update = {
+ resyncDocContent: {
+ version: this.version,
+ historyOTRanges: {
+ comments: this.lines.comments,
+ trackedChanges: this.lines.trackedChanges,
+ },
+ content: this.lines.content,
+ },
+ projectHistoryId: this.projectHistoryId,
+ path: this.pathname,
+ doc: this.doc_id,
+ meta: { ts: new Date() },
+ }
+
+ await this.ProjectHistoryRedisManager.promises.queueResyncDocContent(
+ this.project_id,
+ this.projectHistoryId,
+ this.doc_id,
+ this.lines,
+ this.ranges,
+ this.resolvedCommentIds,
+ this.version,
+ this.pathname,
+ true
+ )
+ })
+
+ it('should include tracked deletes in the update', function () {
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(this.update)
+ )
+ })
+
+ it('should check the doc length without tracked deletes', function () {
+ this.Limits.stringFileDataContentIsTooLarge.should.have.been.calledWith(
+ this.lines,
+ this.settings.max_doc_length
+ )
+ })
+
+ it('should queue an update', function () {
+ this.ProjectHistoryRedisManager.promises.queueOps.should.have.been.calledWithExactly(
+ this.project_id,
+ JSON.stringify(this.update)
+ )
})
})
})
diff --git a/services/document-updater/test/unit/js/RangesManager/RangesManagerTests.js b/services/document-updater/test/unit/js/RangesManager/RangesManagerTests.js
index ec1c8703e9..4053aafb01 100644
--- a/services/document-updater/test/unit/js/RangesManager/RangesManagerTests.js
+++ b/services/document-updater/test/unit/js/RangesManager/RangesManagerTests.js
@@ -323,6 +323,44 @@ describe('RangesManager', function () {
})
})
+ describe('tracked delete rejections with multiple tracked deletes at the same position', function () {
+ beforeEach(function () {
+ // original text is "one [two ][three ][four ]five"
+ // [] denotes tracked deletes
+ this.ranges = {
+ changes: makeRanges([
+ { d: 'two ', p: 4 },
+ { d: 'three ', p: 4 },
+ { d: 'four ', p: 4 },
+ ]),
+ }
+ this.updates = makeUpdates([{ i: 'three ', p: 4, u: true }])
+ this.newDocLines = ['one three five']
+ this.result = this.RangesManager.applyUpdate(
+ this.project_id,
+ this.doc_id,
+ this.ranges,
+ this.updates,
+ this.newDocLines,
+ { historyRangesSupport: true }
+ )
+ })
+
+ it('should insert the text at the right history position', function () {
+ expect(this.result.historyUpdates.map(x => x.op)).to.deep.equal([
+ [
+ {
+ i: 'three ',
+ p: 4,
+ hpos: 8,
+ u: true,
+ trackedDeleteRejection: true,
+ },
+ ],
+ ])
+ })
+ })
+
describe('deletes over tracked changes', function () {
beforeEach(function () {
// original text is "on[1]e [22](three) f[333]ou[4444]r [55555]five"
diff --git a/services/document-updater/test/unit/js/UpdateManager/UpdateManagerTests.js b/services/document-updater/test/unit/js/UpdateManager/UpdateManagerTests.js
index 16ee0b12e1..912707e01d 100644
--- a/services/document-updater/test/unit/js/UpdateManager/UpdateManagerTests.js
+++ b/services/document-updater/test/unit/js/UpdateManager/UpdateManagerTests.js
@@ -1,5 +1,4 @@
-// @ts-check
-
+const { createHash } = require('node:crypto')
const sinon = require('sinon')
const { expect } = require('chai')
const SandboxedModule = require('sandboxed-module')
@@ -332,6 +331,7 @@ describe('UpdateManager', function () {
pathname: this.pathname,
projectHistoryId: this.projectHistoryId,
historyRangesSupport: false,
+ type: 'sharejs-text-ot',
})
this.RangesManager.applyUpdate.returns({
newRanges: this.updated_ranges,
@@ -399,7 +399,9 @@ describe('UpdateManager', function () {
this.historyUpdates,
this.pathname,
this.projectHistoryId,
- this.lines
+ this.lines,
+ this.ranges,
+ this.updatedDocLines
)
})
@@ -501,6 +503,7 @@ describe('UpdateManager', function () {
pathname: this.pathname,
projectHistoryId: this.projectHistoryId,
historyRangesSupport: true,
+ type: 'sharejs-text-ot',
})
await this.UpdateManager.promises.applyUpdate(
this.project_id,
@@ -526,6 +529,7 @@ describe('UpdateManager', function () {
describe('_adjustHistoryUpdatesMetadata', function () {
beforeEach(function () {
this.lines = ['some', 'test', 'data']
+ this.updatedDocLines = ['after', 'updates']
this.historyUpdates = [
{
v: 42,
@@ -570,6 +574,7 @@ describe('UpdateManager', function () {
this.pathname,
this.projectHistoryId,
this.lines,
+ this.updatedDocLines,
this.ranges,
false
)
@@ -632,6 +637,7 @@ describe('UpdateManager', function () {
this.projectHistoryId,
this.lines,
this.ranges,
+ this.updatedDocLines,
true
)
this.historyUpdates.should.deep.equal([
@@ -685,6 +691,7 @@ describe('UpdateManager', function () {
meta: {
pathname: this.pathname,
doc_length: 21, // 23 - 'so'
+ doc_hash: stringHash(this.updatedDocLines.join('\n')),
history_doc_length: 28, // 30 - 'so'
},
},
@@ -699,6 +706,7 @@ describe('UpdateManager', function () {
this.projectHistoryId,
[],
{},
+ ['foobar'],
false
)
this.historyUpdates.should.deep.equal([
@@ -822,3 +830,9 @@ describe('UpdateManager', function () {
})
})
})
+
+function stringHash(s) {
+ const hash = createHash('sha1')
+ hash.update(s)
+ return hash.digest('hex')
+}
diff --git a/services/document-updater/test/unit/js/UtilsTests.js b/services/document-updater/test/unit/js/UtilsTests.js
index 553b90159a..5d0f03ca64 100644
--- a/services/document-updater/test/unit/js/UtilsTests.js
+++ b/services/document-updater/test/unit/js/UtilsTests.js
@@ -1,5 +1,6 @@
// @ts-check
+const { createHash } = require('node:crypto')
const { expect } = require('chai')
const Utils = require('../../../app/js/Utils')
@@ -24,4 +25,30 @@ describe('Utils', function () {
expect(result).to.equal('the quick brown fox jumps over the lazy dog')
})
})
+
+ describe('computeDocHash', function () {
+ it('computes the hash for an empty doc', function () {
+ const actual = Utils.computeDocHash([])
+ const expected = stringHash('')
+ expect(actual).to.equal(expected)
+ })
+
+ it('computes the hash for a single-line doc', function () {
+ const actual = Utils.computeDocHash(['hello'])
+ const expected = stringHash('hello')
+ expect(actual).to.equal(expected)
+ })
+
+ it('computes the hash for a multiline doc', function () {
+ const actual = Utils.computeDocHash(['hello', 'there', 'world'])
+ const expected = stringHash('hello\nthere\nworld')
+ expect(actual).to.equal(expected)
+ })
+ })
})
+
+function stringHash(s) {
+ const hash = createHash('sha1')
+ hash.update(s)
+ return hash.digest('hex')
+}
diff --git a/services/filestore/.gitignore b/services/filestore/.gitignore
index a2f4b5afb2..1772191882 100644
--- a/services/filestore/.gitignore
+++ b/services/filestore/.gitignore
@@ -1,54 +1,3 @@
-compileFolder
-
-Compiled source #
-###################
-*.com
-*.class
-*.dll
-*.exe
-*.o
-*.so
-
-# Packages #
-############
-# it's better to unpack these files and commit the raw source
-# git has its own built in compression methods
-*.7z
-*.dmg
-*.gz
-*.iso
-*.jar
-*.rar
-*.tar
-*.zip
-
-# Logs and databases #
-######################
-*.log
-*.sql
-*.sqlite
-
-# OS generated files #
-######################
-.DS_Store?
-ehthumbs.db
-Icon?
-Thumbs.db
-
-/node_modules/*
-data/*/*
-
-**/*.map
-cookies.txt
uploads/*
-
user_files/*
template_files/*
-
-**.swp
-
-/log.json
-hash_folder
-
-# managed by dev-environment$ bin/update_build_scripts
-.npmrc
diff --git a/services/filestore/.nvmrc b/services/filestore/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/filestore/.nvmrc
+++ b/services/filestore/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/filestore/Dockerfile b/services/filestore/Dockerfile
index f10a01d1b8..33de01c80f 100644
--- a/services/filestore/Dockerfile
+++ b/services/filestore/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/filestore
COPY services/filestore/install_deps.sh /overleaf/services/filestore/
diff --git a/services/filestore/Makefile b/services/filestore/Makefile
index 6cd5212487..69d7f85bf4 100644
--- a/services/filestore/Makefile
+++ b/services/filestore/Makefile
@@ -32,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -63,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -130,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/filestore/app.js b/services/filestore/app.js
index 23d01f1ca3..178e8c7ff0 100644
--- a/services/filestore/app.js
+++ b/services/filestore/app.js
@@ -50,64 +50,73 @@ app.use((req, res, next) => {
Metrics.injectMetricsRoute(app)
-app.head(
- '/project/:project_id/file/:file_id',
- keyBuilder.userFileKeyMiddleware,
- fileController.getFileHead
-)
-app.get(
- '/project/:project_id/file/:file_id',
- keyBuilder.userFileKeyMiddleware,
- fileController.getFile
-)
-app.post(
- '/project/:project_id/file/:file_id',
- keyBuilder.userFileKeyMiddleware,
- fileController.insertFile
-)
-app.put(
- '/project/:project_id/file/:file_id',
- keyBuilder.userFileKeyMiddleware,
- bodyParser.json(),
- fileController.copyFile
-)
-app.delete(
- '/project/:project_id/file/:file_id',
- keyBuilder.userFileKeyMiddleware,
- fileController.deleteFile
-)
-app.delete(
- '/project/:project_id',
- keyBuilder.userProjectKeyMiddleware,
- fileController.deleteProject
-)
+if (settings.filestore.stores.user_files) {
+ app.head(
+ '/project/:project_id/file/:file_id',
+ keyBuilder.userFileKeyMiddleware,
+ fileController.getFileHead
+ )
+ app.get(
+ '/project/:project_id/file/:file_id',
+ keyBuilder.userFileKeyMiddleware,
+ fileController.getFile
+ )
+ app.post(
+ '/project/:project_id/file/:file_id',
+ keyBuilder.userFileKeyMiddleware,
+ fileController.insertFile
+ )
+ app.put(
+ '/project/:project_id/file/:file_id',
+ keyBuilder.userFileKeyMiddleware,
+ bodyParser.json(),
+ fileController.copyFile
+ )
+ app.delete(
+ '/project/:project_id/file/:file_id',
+ keyBuilder.userFileKeyMiddleware,
+ fileController.deleteFile
+ )
+ app.delete(
+ '/project/:project_id',
+ keyBuilder.userProjectKeyMiddleware,
+ fileController.deleteProject
+ )
-app.get(
- '/project/:project_id/size',
- keyBuilder.userProjectKeyMiddleware,
- fileController.directorySize
-)
+ app.get(
+ '/project/:project_id/size',
+ keyBuilder.userProjectKeyMiddleware,
+ fileController.directorySize
+ )
+}
-app.head(
- '/template/:template_id/v/:version/:format',
- keyBuilder.templateFileKeyMiddleware,
- fileController.getFileHead
-)
-app.get(
- '/template/:template_id/v/:version/:format',
- keyBuilder.templateFileKeyMiddleware,
- fileController.getFile
-)
-app.get(
- '/template/:template_id/v/:version/:format/:sub_type',
- keyBuilder.templateFileKeyMiddleware,
- fileController.getFile
-)
-app.post(
- '/template/:template_id/v/:version/:format',
- keyBuilder.templateFileKeyMiddleware,
- fileController.insertFile
-)
+if (settings.filestore.stores.template_files) {
+ app.head(
+ '/template/:template_id/v/:version/:format',
+ keyBuilder.templateFileKeyMiddleware,
+ fileController.getFileHead
+ )
+ app.get(
+ '/template/:template_id/v/:version/:format',
+ keyBuilder.templateFileKeyMiddleware,
+ fileController.getFile
+ )
+ app.get(
+ '/template/:template_id/v/:version/:format/:sub_type',
+ keyBuilder.templateFileKeyMiddleware,
+ fileController.getFile
+ )
+ app.post(
+ '/template/:template_id/v/:version/:format',
+ keyBuilder.templateFileKeyMiddleware,
+ fileController.insertFile
+ )
+ app.delete(
+ '/template/:template_id/v/:version/:format',
+ keyBuilder.templateFileKeyMiddleware,
+ fileController.deleteFile
+ )
+}
app.get(
'/bucket/:bucket/key/*',
diff --git a/services/filestore/app/js/FileConverter.js b/services/filestore/app/js/FileConverter.js
index ac3dccec1f..bfc34314e9 100644
--- a/services/filestore/app/js/FileConverter.js
+++ b/services/filestore/app/js/FileConverter.js
@@ -5,7 +5,7 @@ const { callbackify } = require('node:util')
const safeExec = require('./SafeExec').promises
const { ConversionError } = require('./Errors')
-const APPROVED_FORMATS = ['png']
+const APPROVED_FORMATS = ['png', 'jpg']
const FOURTY_SECONDS = 40 * 1000
const KILL_SIGNAL = 'SIGTERM'
@@ -34,16 +34,14 @@ async function convert(sourcePath, requestedFormat) {
}
async function thumbnail(sourcePath) {
- const width = '260x'
- return await convert(sourcePath, 'png', [
+ const width = '548x'
+ return await _convert(sourcePath, 'jpg', [
'convert',
'-flatten',
'-background',
'white',
'-density',
'300',
- '-define',
- `pdf:fit-page=${width}`,
`${sourcePath}[0]`,
'-resize',
width,
@@ -51,16 +49,14 @@ async function thumbnail(sourcePath) {
}
async function preview(sourcePath) {
- const width = '548x'
- return await convert(sourcePath, 'png', [
+ const width = '794x'
+ return await _convert(sourcePath, 'jpg', [
'convert',
'-flatten',
'-background',
'white',
'-density',
'300',
- '-define',
- `pdf:fit-page=${width}`,
`${sourcePath}[0]`,
'-resize',
width,
diff --git a/services/filestore/app/js/FileHandler.js b/services/filestore/app/js/FileHandler.js
index 2ed28bd435..0c092c85cd 100644
--- a/services/filestore/app/js/FileHandler.js
+++ b/services/filestore/app/js/FileHandler.js
@@ -150,7 +150,9 @@ async function _getConvertedFileAndCache(bucket, key, convertedKey, opts) {
let convertedFsPath
try {
convertedFsPath = await _convertFile(bucket, key, opts)
- await ImageOptimiser.promises.compressPng(convertedFsPath)
+ if (convertedFsPath.toLowerCase().endsWith(".png")) {
+ await ImageOptimiser.promises.compressPng(convertedFsPath)
+ }
await PersistorManager.sendFile(bucket, convertedKey, convertedFsPath)
} catch (err) {
LocalFileWriter.deleteFile(convertedFsPath, () => {})
diff --git a/services/filestore/buildscript.txt b/services/filestore/buildscript.txt
index b95f601976..bd4d2116f6 100644
--- a/services/filestore/buildscript.txt
+++ b/services/filestore/buildscript.txt
@@ -5,8 +5,8 @@ filestore
--env-add=ENABLE_CONVERSIONS="true",USE_PROM_METRICS="true",AWS_S3_USER_FILES_STORAGE_CLASS=REDUCED_REDUNDANCY,AWS_S3_USER_FILES_BUCKET_NAME=fake-user-files,AWS_S3_USER_FILES_DEK_BUCKET_NAME=fake-user-files-dek,AWS_S3_TEMPLATE_FILES_BUCKET_NAME=fake-template-files,GCS_USER_FILES_BUCKET_NAME=fake-gcs-user-files,GCS_TEMPLATE_FILES_BUCKET_NAME=fake-gcs-template-files
--env-pass-through=
--esmock-loader=False
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=True
---script-version=4.5.0
+--script-version=4.7.0
--test-acceptance-shards=SHARD_01_,SHARD_02_,SHARD_03_
--use-large-ci-runner=True
diff --git a/services/filestore/docker-compose.ci.yml b/services/filestore/docker-compose.ci.yml
index 1e3fb0d26f..fdf860b511 100644
--- a/services/filestore/docker-compose.ci.yml
+++ b/services/filestore/docker-compose.ci.yml
@@ -64,7 +64,7 @@ services:
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
certs:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- ./test/acceptance/certs:/certs
working_dir: /certs
diff --git a/services/filestore/docker-compose.yml b/services/filestore/docker-compose.yml
index 287f32d6db..971d35b708 100644
--- a/services/filestore/docker-compose.yml
+++ b/services/filestore/docker-compose.yml
@@ -17,6 +17,7 @@ services:
working_dir: /overleaf/services/filestore
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
@@ -47,7 +48,7 @@ services:
GCS_PROJECT_ID: fake
STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
ENABLE_CONVERSIONS: "true"
@@ -71,7 +72,7 @@ services:
command: npm run --silent test:acceptance
certs:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- ./test/acceptance/certs:/certs
working_dir: /certs
diff --git a/services/filestore/package.json b/services/filestore/package.json
index 5eab300b4f..4b9043aed7 100644
--- a/services/filestore/package.json
+++ b/services/filestore/package.json
@@ -27,7 +27,7 @@
"@overleaf/stream-utils": "^0.1.0",
"body-parser": "^1.20.3",
"bunyan": "^1.8.15",
- "express": "^4.21.0",
+ "express": "^4.21.2",
"glob": "^7.1.6",
"lodash.once": "^4.1.1",
"node-fetch": "^2.7.0",
@@ -38,8 +38,8 @@
"@google-cloud/storage": "^6.10.1",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
- "mocha": "^10.2.0",
- "mongodb": "6.10.0",
+ "mocha": "^11.1.0",
+ "mongodb": "6.12.0",
"sandboxed-module": "2.0.4",
"sinon": "9.0.2",
"sinon-chai": "^3.7.0",
diff --git a/services/filestore/test/acceptance/deps/healthcheck.sh b/services/filestore/test/acceptance/deps/healthcheck.sh
index cd19cea637..675c205be6 100755
--- a/services/filestore/test/acceptance/deps/healthcheck.sh
+++ b/services/filestore/test/acceptance/deps/healthcheck.sh
@@ -1,9 +1,9 @@
#!/bin/sh
# health check to allow 404 status code as valid
-STATUSCODE=$(curl --silent --output /dev/null --write-out "%{http_code}" $1)
+STATUSCODE=$(curl --silent --output /dev/null --write-out "%{http_code}" "$1")
# will be 000 on non-http error (e.g. connection failure)
-if test $STATUSCODE -ge 500 || test $STATUSCODE -lt 200; then
+if test "$STATUSCODE" -ge 500 || test "$STATUSCODE" -lt 200; then
exit 1
fi
exit 0
diff --git a/services/filestore/test/acceptance/js/FilestoreApp.js b/services/filestore/test/acceptance/js/FilestoreApp.js
index afcebb4c95..61e9a29b7d 100644
--- a/services/filestore/test/acceptance/js/FilestoreApp.js
+++ b/services/filestore/test/acceptance/js/FilestoreApp.js
@@ -1,12 +1,9 @@
-const logger = require('@overleaf/logger')
const ObjectPersistor = require('@overleaf/object-persistor')
const Settings = require('@overleaf/settings')
const { promisify } = require('node:util')
const App = require('../../../app')
const FileHandler = require('../../../app/js/FileHandler')
-logger.logger.level('info')
-
class FilestoreApp {
async runServer() {
if (!this.server) {
diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js
index 30c27fcea2..28f90d49b6 100644
--- a/services/filestore/test/acceptance/js/FilestoreTests.js
+++ b/services/filestore/test/acceptance/js/FilestoreTests.js
@@ -1305,11 +1305,39 @@ describe('Filestore', function () {
})
describe('deleteDirectory', function () {
- let checkGET2
+ let checkGET1, checkGET2
beforeEach('create files', async function () {
- await createRandomContent(fileUrl1, '1')
+ checkGET1 = await createRandomContent(fileUrl1, '1')
checkGET2 = await createRandomContent(fileUrl2, '2')
})
+ it('should refuse to delete top-level prefix', async function () {
+ await expect(
+ app.persistor.deleteDirectory(
+ Settings.filestore.stores.user_files,
+ projectId.slice(0, 3)
+ )
+ ).to.be.rejectedWith('not a project-folder')
+ expect(
+ await app.persistor.checkIfObjectExists(
+ Settings.filestore.stores.user_files,
+ fileKey1
+ )
+ ).to.equal(true)
+ await checkGET1()
+ expect(
+ await app.persistor.checkIfObjectExists(
+ Settings.filestore.stores.user_files,
+ fileKey2
+ )
+ ).to.equal(true)
+ expect(
+ await app.persistor.getDataEncryptionKeySize(
+ Settings.filestore.stores.user_files,
+ fileKey2
+ )
+ ).to.equal(32)
+ await checkGET2()
+ })
it('should delete sub-folder and keep DEK', async function () {
await app.persistor.deleteDirectory(
Settings.filestore.stores.user_files,
diff --git a/services/filestore/test/acceptance/js/TestConfig.js b/services/filestore/test/acceptance/js/TestConfig.js
index 7bd75ba781..3ad4ba423d 100644
--- a/services/filestore/test/acceptance/js/TestConfig.js
+++ b/services/filestore/test/acceptance/js/TestConfig.js
@@ -40,7 +40,9 @@ function s3SSECConfig() {
automaticallyRotateDEKEncryption: true,
dataEncryptionKeyBucketName: process.env.AWS_S3_USER_FILES_DEK_BUCKET_NAME,
pathToProjectFolder(_bucketName, path) {
- const [projectFolder] = path.match(/^[a-f0-9]+\//)
+ const match = path.match(/^[a-f0-9]{24}\//)
+ if (!match) throw new Error('not a project-folder')
+ const [projectFolder] = match
return projectFolder
},
async getRootKeyEncryptionKeys() {
diff --git a/services/git-bridge/.gitignore b/services/git-bridge/.gitignore
index 74a7f43d6e..f35e2ee038 100644
--- a/services/git-bridge/.gitignore
+++ b/services/git-bridge/.gitignore
@@ -1,53 +1,6 @@
-# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
-# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
-
-# Let's not share anything because we're using Maven.
-
-.idea
-*.iml
-
-# User-specific stuff:
-.idea/workspace.xml
-.idea/tasks.xml
-.idea/dictionaries
-.idea/vcs.xml
-.idea/jsLibraryMappings.xml
-
-# Sensitive or high-churn files:
-.idea/dataSources.ids
-.idea/dataSources.xml
-.idea/dataSources.local.xml
-.idea/sqlDataSources.xml
-.idea/dynamic.xml
-.idea/uiDesigner.xml
-
-# Gradle:
-.idea/gradle.xml
-.idea/libraries
-
-# Mongo Explorer plugin:
-.idea/mongoSettings.xml
-
-## File-based project format:
-*.iws
-
-## Plugin-specific files:
-
-# IntelliJ
+# Build output
/out/
target/
-# mpeltonen/sbt-idea plugin
-.idea_modules/
-
-# JIRA plugin
-atlassian-ide-plugin.xml
-
-# Crashlytics plugin (for Android Studio and IntelliJ)
-com_crashlytics_export_strings.xml
-crashlytics.properties
-crashlytics-build.properties
-fabric.properties
-
# Local configuration files
conf/runtime.json
diff --git a/services/git-bridge/Dockerfile b/services/git-bridge/Dockerfile
index 0d8b1e43e5..48579b9494 100644
--- a/services/git-bridge/Dockerfile
+++ b/services/git-bridge/Dockerfile
@@ -1,11 +1,17 @@
-# Dockerfile for git-bridge
+# Build the a8m/envsubst binary, as it supports default values,
+# which the gnu envsubst (from gettext-base) does not.
+FROM golang:1.24.3-alpine AS envsubst_builder
+
+WORKDIR /build
+
+RUN go install github.com/a8m/envsubst/cmd/envsubst@latest
FROM maven:3-amazoncorretto-21-debian AS base
RUN apt-get update && apt-get install -y make git sqlite3 \
&& rm -rf /var/lib/apt/lists
-COPY vendor/envsubst /opt/envsubst
+COPY --from=envsubst_builder /go/bin/envsubst /opt/envsubst
RUN chmod +x /opt/envsubst
RUN useradd --create-home node
@@ -29,16 +35,11 @@ RUN apk add --update --no-cache bash git sqlite procps htop net-tools jemalloc u
ENV LD_PRELOAD=/usr/lib/libjemalloc.so.2
-# Install Google Cloud Profiler agent
-RUN mkdir -p /opt/cprof && \
- wget -q -O- https://storage.googleapis.com/cloud-profiler/java/latest/profiler_java_agent.tar.gz \
- | tar xzv -C /opt/cprof
-
RUN adduser -D node
COPY --from=builder /git-bridge.jar /
-COPY vendor/envsubst /opt/envsubst
+COPY --from=envsubst_builder /go/bin/envsubst /opt/envsubst
RUN chmod +x /opt/envsubst
COPY conf/envsubst_template.json envsubst_template.json
diff --git a/services/git-bridge/README.md b/services/git-bridge/README.md
index 13b24cc6d0..eadc2abc4f 100644
--- a/services/git-bridge/README.md
+++ b/services/git-bridge/README.md
@@ -76,12 +76,10 @@ The configuration file is in `.json` format.
"postbackBaseUrl" (string): the postback url,
"serviceName" (string): current name of writeLaTeX
in case it ever changes,
- "oauth2" (object): { null or missing if oauth2 shouldn't be used
- "oauth2ClientID" (string): oauth2 client ID,
- "oauth2ClientSecret" (string): oauth2 client secret,
- "oauth2Server" (string): oauth2 server,
- with protocol and
- without trailing slash
+ "oauth2Server" (string): oauth2 server,
+ with protocol and
+ without trailing slash,
+ null or missing if oauth2 shouldn't be used
},
"repoStore" (object, optional): { configure the repo store
"maxFileSize" (long, optional): maximum size of a file, inclusive
diff --git a/services/git-bridge/conf/envsubst_template.json b/services/git-bridge/conf/envsubst_template.json
index 1f52ffbaef..4ede5bab7f 100644
--- a/services/git-bridge/conf/envsubst_template.json
+++ b/services/git-bridge/conf/envsubst_template.json
@@ -3,14 +3,11 @@
"bindIp": "${GIT_BRIDGE_BIND_IP:-0.0.0.0}",
"idleTimeout": ${GIT_BRIDGE_IDLE_TIMEOUT:-30000},
"rootGitDirectory": "${GIT_BRIDGE_ROOT_DIR:-/tmp/wlgb}",
+ "allowedCorsOrigins": "${GIT_BRIDGE_ALLOWED_CORS_ORIGINS:-https://localhost}",
"apiBaseUrl": "${GIT_BRIDGE_API_BASE_URL:-https://localhost/api/v0}",
"postbackBaseUrl": "${GIT_BRIDGE_POSTBACK_BASE_URL:-https://localhost}",
"serviceName": "${GIT_BRIDGE_SERVICE_NAME:-Overleaf}",
- "oauth2": {
- "oauth2ClientID": "${GIT_BRIDGE_OAUTH2_CLIENT_ID}",
- "oauth2ClientSecret": "${GIT_BRIDGE_OAUTH2_CLIENT_SECRET}",
- "oauth2Server": "${GIT_BRIDGE_OAUTH2_SERVER:-https://localhost}"
- },
+ "oauth2Server": "${GIT_BRIDGE_OAUTH2_SERVER:-https://localhost}",
"userPasswordEnabled": ${GIT_BRIDGE_USER_PASSWORD_ENABLED:-false},
"repoStore": {
"maxFileNum": ${GIT_BRIDGE_REPOSTORE_MAX_FILE_NUM:-2000},
diff --git a/services/git-bridge/conf/example_config.json b/services/git-bridge/conf/example_config.json
index bfad73f461..76b82eb6a0 100644
--- a/services/git-bridge/conf/example_config.json
+++ b/services/git-bridge/conf/example_config.json
@@ -3,14 +3,11 @@
"bindIp": "127.0.0.1",
"idleTimeout": 30000,
"rootGitDirectory": "/tmp/wlgb",
+ "allowedCorsOrigins": "https://localhost",
"apiBaseUrl": "https://localhost/api/v0",
"postbackBaseUrl": "https://localhost",
"serviceName": "Overleaf",
- "oauth2": {
- "oauth2ClientID": "asdf",
- "oauth2ClientSecret": "asdf",
- "oauth2Server": "https://localhost"
- },
+ "oauth2Server": "https://localhost",
"repoStore": {
"maxFileNum": 2000,
"maxFileSize": 52428800
diff --git a/services/git-bridge/conf/local.json b/services/git-bridge/conf/local.json
index 03ce4febe4..c4de48d819 100644
--- a/services/git-bridge/conf/local.json
+++ b/services/git-bridge/conf/local.json
@@ -3,14 +3,11 @@
"bindIp": "0.0.0.0",
"idleTimeout": 30000,
"rootGitDirectory": "/tmp/wlgb",
+ "allowedCorsOrigins": "http://v2.overleaf.test",
"apiBaseUrl": "http://v2.overleaf.test:3000/api/v0",
"postbackBaseUrl": "http://git-bridge:8000",
"serviceName": "Overleaf",
- "oauth2": {
- "oauth2ClientID": "264c723c925c13590880751f861f13084934030c13b4452901e73bdfab226edc",
- "oauth2ClientSecret": "e6b2e9eee7ae2bb653823250bb69594a91db0547fe3790a7135acb497108e62d",
- "oauth2Server": "http://v2.overleaf.test:3000"
- },
+ "oauth2Server": "http://v2.overleaf.test:3000",
"repoStore": {
"maxFileNum": 2000,
"maxFileSize": 52428800
diff --git a/services/git-bridge/pom.xml b/services/git-bridge/pom.xml
index 623f8f1229..3feb4dd860 100644
--- a/services/git-bridge/pom.xml
+++ b/services/git-bridge/pom.xml
@@ -16,24 +16,24 @@
2.23
4.13.2
2.8.4
- 9.4.56.v20240826
+ 9.4.57.v20241219
2.9.0
- 3.0.1
- 6.6.1.202309021850-r
+ 3.0.2
+ 6.10.1.202505221210-r
3.41.2.2
2.9.9
- 1.34.1
+ 1.37.0
1.23.0
- 3.12.0
- 1.2.3
+ 3.17.0
+ 1.2.13
5.12.0
5.12.0
- 1.11.274
+ 1.12.780
${jaxb.runtime.version}
2.3.2
4.5.14
- 2.10.0
- 1.24.0
+ 2.18.0
+ 1.27.1
0.10.0
1.70
@@ -206,7 +206,7 @@
com.amazonaws
- aws-java-sdk
+ aws-java-sdk-s3
${aws.java.sdk.version}
diff --git a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/application/config/Config.java b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/application/config/Config.java
index cf36916600..d5b530100e 100644
--- a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/application/config/Config.java
+++ b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/application/config/Config.java
@@ -26,10 +26,11 @@ public class Config implements JSONSource {
config.bindIp,
config.idleTimeout,
config.rootGitDirectory,
+ config.allowedCorsOrigins,
config.apiBaseURL,
config.postbackURL,
config.serviceName,
- Oauth2.asSanitised(config.oauth2),
+ config.oauth2Server,
config.userPasswordEnabled,
config.repoStore,
SwapStoreConfig.sanitisedCopy(config.swapStore),
@@ -41,10 +42,11 @@ public class Config implements JSONSource {
private String bindIp;
private int idleTimeout;
private String rootGitDirectory;
+ private String[] allowedCorsOrigins;
private String apiBaseURL;
private String postbackURL;
private String serviceName;
- @Nullable private Oauth2 oauth2;
+ @Nullable private String oauth2Server;
private boolean userPasswordEnabled;
@Nullable private RepoStoreConfig repoStore;
@Nullable private SwapStoreConfig swapStore;
@@ -64,10 +66,11 @@ public class Config implements JSONSource {
String bindIp,
int idleTimeout,
String rootGitDirectory,
+ String[] allowedCorsOrigins,
String apiBaseURL,
String postbackURL,
String serviceName,
- Oauth2 oauth2,
+ String oauth2Server,
boolean userPasswordEnabled,
RepoStoreConfig repoStore,
SwapStoreConfig swapStore,
@@ -77,10 +80,11 @@ public class Config implements JSONSource {
this.bindIp = bindIp;
this.idleTimeout = idleTimeout;
this.rootGitDirectory = rootGitDirectory;
+ this.allowedCorsOrigins = allowedCorsOrigins;
this.apiBaseURL = apiBaseURL;
this.postbackURL = postbackURL;
this.serviceName = serviceName;
- this.oauth2 = oauth2;
+ this.oauth2Server = oauth2Server;
this.userPasswordEnabled = userPasswordEnabled;
this.repoStore = repoStore;
this.swapStore = swapStore;
@@ -101,11 +105,18 @@ public class Config implements JSONSource {
}
this.apiBaseURL = apiBaseURL;
serviceName = getElement(configObject, "serviceName").getAsString();
+ final String rawAllowedCorsOrigins =
+ getOptionalString(configObject, "allowedCorsOrigins").trim();
+ if (rawAllowedCorsOrigins.isEmpty()) {
+ allowedCorsOrigins = new String[] {};
+ } else {
+ allowedCorsOrigins = rawAllowedCorsOrigins.split(",");
+ }
postbackURL = getElement(configObject, "postbackBaseUrl").getAsString();
if (!postbackURL.endsWith("/")) {
postbackURL += "/";
}
- oauth2 = new Gson().fromJson(configObject.get("oauth2"), Oauth2.class);
+ oauth2Server = getOptionalString(configObject, "oauth2Server");
userPasswordEnabled = getOptionalString(configObject, "userPasswordEnabled").equals("true");
repoStore = new Gson().fromJson(configObject.get("repoStore"), RepoStoreConfig.class);
swapStore = new Gson().fromJson(configObject.get("swapStore"), SwapStoreConfig.class);
@@ -139,6 +150,10 @@ public class Config implements JSONSource {
return this.sqliteHeapLimitBytes;
}
+ public String[] getAllowedCorsOrigins() {
+ return allowedCorsOrigins;
+ }
+
public String getAPIBaseURL() {
return apiBaseURL;
}
@@ -151,19 +166,12 @@ public class Config implements JSONSource {
return postbackURL;
}
- public boolean isUsingOauth2() {
- return oauth2 != null;
- }
-
public boolean isUserPasswordEnabled() {
return userPasswordEnabled;
}
- public Oauth2 getOauth2() {
- if (!isUsingOauth2()) {
- throw new AssertionError("Getting oauth2 when not using it");
- }
- return oauth2;
+ public String getOauth2Server() {
+ return oauth2Server;
}
public Optional getRepoStore() {
diff --git a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/application/config/Oauth2.java b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/application/config/Oauth2.java
deleted file mode 100644
index 1db7d3b4d2..0000000000
--- a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/application/config/Oauth2.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package uk.ac.ic.wlgitbridge.application.config;
-
-/*
- * Created by winston on 25/10/15.
- */
-public class Oauth2 {
-
- private final String oauth2ClientID;
- private final String oauth2ClientSecret;
- private final String oauth2Server;
-
- public Oauth2(String oauth2ClientID, String oauth2ClientSecret, String oauth2Server) {
- this.oauth2ClientID = oauth2ClientID;
- this.oauth2ClientSecret = oauth2ClientSecret;
- this.oauth2Server = oauth2Server;
- }
-
- public String getOauth2ClientID() {
- return oauth2ClientID;
- }
-
- public String getOauth2ClientSecret() {
- return oauth2ClientSecret;
- }
-
- public String getOauth2Server() {
- return oauth2Server;
- }
-
- public static Oauth2 asSanitised(Oauth2 oauth2) {
- return new Oauth2("", "", oauth2.oauth2Server);
- }
-}
diff --git a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/CORSHandler.java b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/CORSHandler.java
new file mode 100644
index 0000000000..10d978c352
--- /dev/null
+++ b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/CORSHandler.java
@@ -0,0 +1,47 @@
+package uk.ac.ic.wlgitbridge.server;
+
+import java.io.IOException;
+import java.util.Set;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.eclipse.jetty.server.Request;
+import org.eclipse.jetty.server.handler.AbstractHandler;
+import uk.ac.ic.wlgitbridge.util.Log;
+
+public class CORSHandler extends AbstractHandler {
+ private final Set allowedCorsOrigins;
+
+ public CORSHandler(String[] allowedCorsOrigins) {
+ this.allowedCorsOrigins = Set.of(allowedCorsOrigins);
+ }
+
+ @Override
+ public void handle(
+ String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response)
+ throws IOException {
+
+ String origin = request.getHeader("Origin");
+ if (origin == null) {
+ return; // Not a CORS request
+ }
+
+ final boolean ok = allowedCorsOrigins.contains(origin);
+ if (ok) {
+ response.setHeader("Access-Control-Allow-Origin", origin);
+ response.setHeader("Access-Control-Allow-Credentials", "true");
+ response.setHeader("Access-Control-Allow-Methods", "GET, HEAD, PUT, POST, DELETE");
+ response.setHeader("Access-Control-Allow-Headers", "Authorization, Content-Type");
+ response.setHeader("Access-Control-Max-Age", "86400"); // cache for 24h
+ }
+ String method = baseRequest.getMethod();
+ if ("OPTIONS".equals(method)) {
+ Log.debug("OPTIONS <- {}", target);
+ baseRequest.setHandled(true);
+ if (ok) {
+ response.setStatus(200);
+ } else {
+ response.setStatus(403);
+ }
+ }
+ }
+}
diff --git a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/GitBridgeServer.java b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/GitBridgeServer.java
index 30c5039212..57d1b34a7b 100644
--- a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/GitBridgeServer.java
+++ b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/GitBridgeServer.java
@@ -110,6 +110,7 @@ public class GitBridgeServer {
this.jettyServer.addConnector(connector);
HandlerCollection handlers = new HandlerList();
+ handlers.addHandler(new CORSHandler(config.getAllowedCorsOrigins()));
handlers.addHandler(initApiHandler());
handlers.addHandler(initBaseHandler());
handlers.addHandler(initGitHandler(config, repoStore, snapshotApi));
@@ -150,9 +151,9 @@ public class GitBridgeServer {
throws ServletException {
final ServletContextHandler servletContextHandler =
new ServletContextHandler(ServletContextHandler.SESSIONS);
- if (config.isUsingOauth2()) {
+ if (config.getOauth2Server() != null) {
Filter filter =
- new Oauth2Filter(snapshotApi, config.getOauth2(), config.isUserPasswordEnabled());
+ new Oauth2Filter(snapshotApi, config.getOauth2Server(), config.isUserPasswordEnabled());
servletContextHandler.addFilter(
new FilterHolder(filter), "/*", EnumSet.of(DispatcherType.REQUEST));
}
diff --git a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/Oauth2Filter.java b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/Oauth2Filter.java
index 5bd3904e47..586a21ab3f 100644
--- a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/Oauth2Filter.java
+++ b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/server/Oauth2Filter.java
@@ -13,7 +13,6 @@ import javax.servlet.*;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.codec.binary.Base64;
-import uk.ac.ic.wlgitbridge.application.config.Oauth2;
import uk.ac.ic.wlgitbridge.bridge.snapshot.SnapshotApi;
import uk.ac.ic.wlgitbridge.util.Instance;
import uk.ac.ic.wlgitbridge.util.Log;
@@ -28,13 +27,13 @@ public class Oauth2Filter implements Filter {
private final SnapshotApi snapshotApi;
- private final Oauth2 oauth2;
+ private final String oauth2Server;
private final boolean isUserPasswordEnabled;
- public Oauth2Filter(SnapshotApi snapshotApi, Oauth2 oauth2, boolean isUserPasswordEnabled) {
+ public Oauth2Filter(SnapshotApi snapshotApi, String oauth2Server, boolean isUserPasswordEnabled) {
this.snapshotApi = snapshotApi;
- this.oauth2 = oauth2;
+ this.oauth2Server = oauth2Server;
this.isUserPasswordEnabled = isUserPasswordEnabled;
}
@@ -108,7 +107,7 @@ public class Oauth2Filter implements Filter {
// fail later (for example, in the unlikely event that the token
// expired between the two requests). In that case, JGit will
// return a 401 without a custom error message.
- int statusCode = checkAccessToken(oauth2, password, getClientIp(request));
+ int statusCode = checkAccessToken(this.oauth2Server, password, getClientIp(request));
if (statusCode == 429) {
handleRateLimit(projectId, username, request, response);
return;
@@ -238,10 +237,9 @@ public class Oauth2Filter implements Filter {
"your Overleaf Account Settings."));
}
- private int checkAccessToken(Oauth2 oauth2, String accessToken, String clientIp)
+ private int checkAccessToken(String oauth2Server, String accessToken, String clientIp)
throws IOException {
- GenericUrl url =
- new GenericUrl(oauth2.getOauth2Server() + "/oauth/token/info?client_ip=" + clientIp);
+ GenericUrl url = new GenericUrl(oauth2Server + "/oauth/token/info?client_ip=" + clientIp);
HttpRequest request = Instance.httpRequestFactory.buildGetRequest(url);
HttpHeaders headers = new HttpHeaders();
headers.setAuthorization("Bearer " + accessToken);
diff --git a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/util/Tar.java b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/util/Tar.java
index 878adde27d..512babf9c7 100644
--- a/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/util/Tar.java
+++ b/services/git-bridge/src/main/java/uk/ac/ic/wlgitbridge/util/Tar.java
@@ -5,6 +5,7 @@ import java.io.*;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.commons.compress.archivers.ArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
@@ -147,7 +148,7 @@ public class Tar {
throws IOException {
Preconditions.checkArgument(dir.isDirectory());
String name = base.relativize(Paths.get(dir.getAbsolutePath())).toString();
- ArchiveEntry entry = tout.createArchiveEntry(dir, name);
+ TarArchiveEntry entry = tout.createArchiveEntry(dir, name);
tout.putArchiveEntry(entry);
tout.closeArchiveEntry();
for (File f : dir.listFiles()) {
@@ -160,7 +161,7 @@ public class Tar {
Preconditions.checkArgument(file.isFile(), "given file" + " is not file: %s", file);
checkFileSize(file.length());
String name = base.relativize(Paths.get(file.getAbsolutePath())).toString();
- ArchiveEntry entry = tout.createArchiveEntry(file, name);
+ TarArchiveEntry entry = tout.createArchiveEntry(file, name);
tout.putArchiveEntry(entry);
try (InputStream in = new FileInputStream(file)) {
IOUtils.copy(in, tout);
diff --git a/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/application/WLGitBridgeIntegrationTest.java b/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/application/WLGitBridgeIntegrationTest.java
index e250798652..f706d98edf 100644
--- a/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/application/WLGitBridgeIntegrationTest.java
+++ b/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/application/WLGitBridgeIntegrationTest.java
@@ -465,8 +465,12 @@ public class WLGitBridgeIntegrationTest {
@After
public void tearDown() {
- server.stop();
- wlgb.stop();
+ if (server != null) {
+ server.stop();
+ }
+ if (wlgb != null) {
+ wlgb.stop();
+ }
}
private void gitConfig(File dir) throws IOException, InterruptedException {
@@ -1391,6 +1395,80 @@ public class WLGitBridgeIntegrationTest {
assertTrue(f.exists());
}
+ @Test
+ public void noCors() throws IOException, ExecutionException, InterruptedException {
+
+ int gitBridgePort = 33893;
+ int mockServerPort = 3893;
+
+ server = new MockSnapshotServer(mockServerPort, getResource("/canServePushedFiles").toFile());
+ server.start();
+ server.setState(states.get("canServePushedFiles").get("state"));
+
+ wlgb = new GitBridgeApp(new String[] {makeConfigFile(gitBridgePort, mockServerPort)});
+ wlgb.run();
+
+ String url = "http://127.0.0.1:" + gitBridgePort + "/status";
+ Response response = asyncHttpClient().prepareGet(url).execute().get();
+ assertEquals(200, response.getStatusCode());
+ assertEquals("ok\n", response.getResponseBody());
+ assertNull(response.getHeader("Access-Control-Allow-Origin"));
+ }
+
+ @Test
+ public void cors() throws IOException, ExecutionException, InterruptedException {
+
+ int gitBridgePort = 33894;
+ int mockServerPort = 3894;
+
+ server = new MockSnapshotServer(mockServerPort, getResource("/canServePushedFiles").toFile());
+ server.start();
+ server.setState(states.get("canServePushedFiles").get("state"));
+
+ wlgb = new GitBridgeApp(new String[] {makeConfigFile(gitBridgePort, mockServerPort)});
+ wlgb.run();
+
+ String url = "http://127.0.0.1:" + gitBridgePort + "/status";
+
+ // Success
+ Response response =
+ asyncHttpClient()
+ .prepareOptions(url)
+ .setHeader("Origin", "https://localhost")
+ .execute()
+ .get();
+ assertEquals(200, response.getStatusCode());
+ assertEquals("", response.getResponseBody());
+ assertEquals("https://localhost", response.getHeader("Access-Control-Allow-Origin"));
+
+ response =
+ asyncHttpClient().prepareGet(url).setHeader("Origin", "https://localhost").execute().get();
+ assertEquals(200, response.getStatusCode());
+ assertEquals("ok\n", response.getResponseBody());
+ assertEquals("https://localhost", response.getHeader("Access-Control-Allow-Origin"));
+
+ // Deny
+ response =
+ asyncHttpClient()
+ .prepareOptions(url)
+ .setHeader("Origin", "https://not-localhost")
+ .execute()
+ .get();
+ assertEquals(403, response.getStatusCode());
+ assertEquals("", response.getResponseBody());
+ assertNull(response.getHeader("Access-Control-Allow-Origin"));
+
+ response =
+ asyncHttpClient()
+ .prepareGet(url)
+ .setHeader("Origin", "https://not-localhost")
+ .execute()
+ .get();
+ assertEquals(200, response.getStatusCode());
+ assertEquals("ok\n", response.getResponseBody());
+ assertNull(response.getHeader("Access-Control-Allow-Origin"));
+ }
+
private String makeConfigFile(int port, int apiPort) throws IOException {
return makeConfigFile(port, apiPort, null);
}
@@ -1409,6 +1487,7 @@ public class WLGitBridgeIntegrationTest {
+ " \"rootGitDirectory\": \""
+ wlgb.getAbsolutePath()
+ "\",\n"
+ + " \"allowedCorsOrigins\": \"https://localhost\",\n"
+ " \"apiBaseUrl\": \"http://127.0.0.1:"
+ apiPort
+ "/api/v0\",\n"
@@ -1416,13 +1495,9 @@ public class WLGitBridgeIntegrationTest {
+ port
+ "\",\n"
+ " \"serviceName\": \"Overleaf\",\n"
- + " \"oauth2\": {\n"
- + " \"oauth2ClientID\": \"clientID\",\n"
- + " \"oauth2ClientSecret\": \"oauth2 client secret\",\n"
- + " \"oauth2Server\": \"http://127.0.0.1:"
+ + " \"oauth2Server\": \"http://127.0.0.1:"
+ apiPort
- + "\"\n"
- + " }";
+ + "\"";
if (swapCfg != null) {
cfgStr +=
",\n"
@@ -1445,7 +1520,6 @@ public class WLGitBridgeIntegrationTest {
+ ",\n"
+ " \"intervalMillis\": "
+ swapCfg.getIntervalMillis()
- + "\n"
+ " }\n";
}
cfgStr += "}\n";
diff --git a/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/application/config/ConfigTest.java b/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/application/config/ConfigTest.java
index ddafc621d6..8c102dbda3 100644
--- a/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/application/config/ConfigTest.java
+++ b/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/application/config/ConfigTest.java
@@ -23,11 +23,7 @@ public class ConfigTest {
+ " \"apiBaseUrl\": \"http://127.0.0.1:60000/api/v0\",\n"
+ " \"postbackBaseUrl\": \"http://127.0.0.1\",\n"
+ " \"serviceName\": \"Overleaf\",\n"
- + " \"oauth2\": {\n"
- + " \"oauth2ClientID\": \"clientID\",\n"
- + " \"oauth2ClientSecret\": \"oauth2 client secret\",\n"
- + " \"oauth2Server\": \"https://www.overleaf.com\"\n"
- + " }\n"
+ + " \"oauth2Server\": \"https://www.overleaf.com\"\n"
+ "}\n");
Config config = new Config(reader);
assertEquals(80, config.getPort());
@@ -35,10 +31,7 @@ public class ConfigTest {
assertEquals("http://127.0.0.1:60000/api/v0/", config.getAPIBaseURL());
assertEquals("http://127.0.0.1/", config.getPostbackURL());
assertEquals("Overleaf", config.getServiceName());
- assertTrue(config.isUsingOauth2());
- assertEquals("clientID", config.getOauth2().getOauth2ClientID());
- assertEquals("oauth2 client secret", config.getOauth2().getOauth2ClientSecret());
- assertEquals("https://www.overleaf.com", config.getOauth2().getOauth2Server());
+ assertEquals("https://www.overleaf.com", config.getOauth2Server());
}
@Test(expected = AssertionError.class)
@@ -53,7 +46,7 @@ public class ConfigTest {
+ " \"apiBaseUrl\": \"http://127.0.0.1:60000/api/v0\",\n"
+ " \"postbackBaseUrl\": \"http://127.0.0.1\",\n"
+ " \"serviceName\": \"Overleaf\",\n"
- + " \"oauth2\": null\n"
+ + " \"oauth2Server\": null\n"
+ "}\n");
Config config = new Config(reader);
assertEquals(80, config.getPort());
@@ -61,8 +54,7 @@ public class ConfigTest {
assertEquals("http://127.0.0.1:60000/api/v0/", config.getAPIBaseURL());
assertEquals("http://127.0.0.1/", config.getPostbackURL());
assertEquals("Overleaf", config.getServiceName());
- assertFalse(config.isUsingOauth2());
- config.getOauth2();
+ assertNull(config.getOauth2Server());
}
@Test
@@ -77,11 +69,7 @@ public class ConfigTest {
+ " \"apiBaseUrl\": \"http://127.0.0.1:60000/api/v0\",\n"
+ " \"postbackBaseUrl\": \"http://127.0.0.1\",\n"
+ " \"serviceName\": \"Overleaf\",\n"
- + " \"oauth2\": {\n"
- + " \"oauth2ClientID\": \"my oauth2 client id\",\n"
- + " \"oauth2ClientSecret\": \"my oauth2 client secret\",\n"
- + " \"oauth2Server\": \"https://www.overleaf.com\"\n"
- + " }\n"
+ + " \"oauth2Server\": \"https://www.overleaf.com\"\n"
+ "}\n");
Config config = new Config(reader);
String expected =
@@ -90,14 +78,11 @@ public class ConfigTest {
+ " \"bindIp\": \"127.0.0.1\",\n"
+ " \"idleTimeout\": 30000,\n"
+ " \"rootGitDirectory\": \"/var/wlgb/git\",\n"
+ + " \"allowedCorsOrigins\": [],\n"
+ " \"apiBaseURL\": \"http://127.0.0.1:60000/api/v0/\",\n"
+ " \"postbackURL\": \"http://127.0.0.1/\",\n"
+ " \"serviceName\": \"Overleaf\",\n"
- + " \"oauth2\": {\n"
- + " \"oauth2ClientID\": \"\",\n"
- + " \"oauth2ClientSecret\": \"\",\n"
- + " \"oauth2Server\": \"https://www.overleaf.com\"\n"
- + " },\n"
+ + " \"oauth2Server\": \"https://www.overleaf.com\",\n"
+ " \"userPasswordEnabled\": false,\n"
+ " \"repoStore\": null,\n"
+ " \"swapStore\": null,\n"
diff --git a/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/bridge/BridgeTest.java b/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/bridge/BridgeTest.java
index f749dea357..e27c3488c0 100644
--- a/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/bridge/BridgeTest.java
+++ b/services/git-bridge/src/test/java/uk/ac/ic/wlgitbridge/bridge/BridgeTest.java
@@ -50,7 +50,7 @@ public class BridgeTest {
gcJob = mock(GcJob.class);
bridge =
new Bridge(
- new Config(0, "", 0, "", "", "", "", null, false, null, null, null, 0),
+ new Config(0, "", 0, "", null, "", "", "", null, false, null, null, null, 0),
lock,
repoStore,
dbStore,
diff --git a/services/git-bridge/vendor/envsubst b/services/git-bridge/vendor/envsubst
deleted file mode 100755
index f7ad8081d0..0000000000
Binary files a/services/git-bridge/vendor/envsubst and /dev/null differ
diff --git a/services/history-v1/.gitignore b/services/history-v1/.gitignore
deleted file mode 100644
index edb0f85350..0000000000
--- a/services/history-v1/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-
-# managed by monorepo$ bin/update_build_scripts
-.npmrc
diff --git a/services/history-v1/.nvmrc b/services/history-v1/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/history-v1/.nvmrc
+++ b/services/history-v1/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/history-v1/Dockerfile b/services/history-v1/Dockerfile
index be43ce553c..322ab67ff8 100644
--- a/services/history-v1/Dockerfile
+++ b/services/history-v1/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/history-v1
COPY services/history-v1/install_deps.sh /overleaf/services/history-v1/
diff --git a/services/history-v1/Makefile b/services/history-v1/Makefile
index 81db72f7cd..7e62ba1812 100644
--- a/services/history-v1/Makefile
+++ b/services/history-v1/Makefile
@@ -32,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -63,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -98,13 +116,6 @@ test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) up -d mongo
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) exec -T mongo sh -c ' \
- while ! mongosh --eval "db.version()" > /dev/null; do \
- echo "Waiting for Mongo..."; \
- sleep 1; \
- done; \
- mongosh --eval "rs.initiate({ _id: \"overleaf\", members: [ { _id: 0, host: \"mongo:27017\" } ] })"'
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif
@@ -137,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/history-v1/api/app/rollout.js b/services/history-v1/api/app/rollout.js
new file mode 100644
index 0000000000..24ca0409f8
--- /dev/null
+++ b/services/history-v1/api/app/rollout.js
@@ -0,0 +1,76 @@
+const crypto = require('node:crypto')
+
+class Rollout {
+ constructor(config) {
+ // The history buffer level is used to determine whether to queue changes
+ // in Redis or persist them directly to the chunk store.
+ // If defaults to 0 (no queuing) if not set.
+ this.historyBufferLevel = config.has('historyBufferLevel')
+ ? parseInt(config.get('historyBufferLevel'), 10)
+ : 0
+ // The forcePersistBuffer flag will ensure the buffer is fully persisted before
+ // any persist operation. Set this to true if you want to make the persisted-version
+ // in Redis match the endVersion of the latest chunk. This should be set to true
+ // when downgrading from a history buffer level that queues changes in Redis
+ // without persisting them immediately.
+ this.forcePersistBuffer = config.has('forcePersistBuffer')
+ ? config.get('forcePersistBuffer') === 'true'
+ : false
+
+ // Support gradual rollout of the next history buffer level
+ // with a percentage of projects using it.
+ this.nextHistoryBufferLevel = config.has('nextHistoryBufferLevel')
+ ? parseInt(config.get('nextHistoryBufferLevel'), 10)
+ : null
+ this.nextHistoryBufferLevelRolloutPercentage = config.has(
+ 'nextHistoryBufferLevelRolloutPercentage'
+ )
+ ? parseInt(config.get('nextHistoryBufferLevelRolloutPercentage'), 10)
+ : 0
+ }
+
+ report(logger) {
+ logger.info(
+ {
+ historyBufferLevel: this.historyBufferLevel,
+ forcePersistBuffer: this.forcePersistBuffer,
+ nextHistoryBufferLevel: this.nextHistoryBufferLevel,
+ nextHistoryBufferLevelRolloutPercentage:
+ this.nextHistoryBufferLevelRolloutPercentage,
+ },
+ this.historyBufferLevel > 0 || this.forcePersistBuffer
+ ? 'using history buffer'
+ : 'history buffer disabled'
+ )
+ }
+
+ /**
+ * Get the history buffer level for a project.
+ * @param {string} projectId
+ * @returns {Object} - An object containing the history buffer level and force persist buffer flag.
+ * @property {number} historyBufferLevel - The history buffer level to use for processing changes.
+ * @property {boolean} forcePersistBuffer - If true, forces the buffer to be persisted before any operation.
+ */
+ getHistoryBufferLevelOptions(projectId) {
+ if (
+ this.nextHistoryBufferLevel > this.historyBufferLevel &&
+ this.nextHistoryBufferLevelRolloutPercentage > 0
+ ) {
+ const hash = crypto.createHash('sha1').update(projectId).digest('hex')
+ const percentage = parseInt(hash.slice(0, 8), 16) % 100
+ // If the project is in the rollout percentage, we use the next history buffer level.
+ if (percentage < this.nextHistoryBufferLevelRolloutPercentage) {
+ return {
+ historyBufferLevel: this.nextHistoryBufferLevel,
+ forcePersistBuffer: this.forcePersistBuffer,
+ }
+ }
+ }
+ return {
+ historyBufferLevel: this.historyBufferLevel,
+ forcePersistBuffer: this.forcePersistBuffer,
+ }
+ }
+}
+
+module.exports = Rollout
diff --git a/services/history-v1/api/controllers/project_import.js b/services/history-v1/api/controllers/project_import.js
index ec4aa317b0..02fb793c87 100644
--- a/services/history-v1/api/controllers/project_import.js
+++ b/services/history-v1/api/controllers/project_import.js
@@ -1,6 +1,10 @@
+// @ts-check
+
'use strict'
-const BPromise = require('bluebird')
+const config = require('config')
+const { expressify } = require('@overleaf/promise-utils')
+
const HTTPStatus = require('http-status')
const core = require('overleaf-editor-core')
@@ -18,11 +22,18 @@ const BatchBlobStore = storage.BatchBlobStore
const BlobStore = storage.BlobStore
const chunkStore = storage.chunkStore
const HashCheckBlobStore = storage.HashCheckBlobStore
-const persistChanges = storage.persistChanges
+const commitChanges = storage.commitChanges
+const persistBuffer = storage.persistBuffer
+const InvalidChangeError = storage.InvalidChangeError
const render = require('./render')
+const Rollout = require('../app/rollout')
+const redisBackend = require('../../storage/lib/chunk_store/redis')
-exports.importSnapshot = function importSnapshot(req, res, next) {
+const rollout = new Rollout(config)
+rollout.report(logger) // display the rollout configuration in the logs
+
+async function importSnapshot(req, res) {
const projectId = req.swagger.params.project_id.value
const rawSnapshot = req.swagger.params.snapshot.value
@@ -31,24 +42,26 @@ exports.importSnapshot = function importSnapshot(req, res, next) {
try {
snapshot = Snapshot.fromRaw(rawSnapshot)
} catch (err) {
+ logger.warn({ err, projectId }, 'failed to import snapshot')
return render.unprocessableEntity(res)
}
- return chunkStore
- .initializeProject(projectId, snapshot)
- .then(function (projectId) {
- res.status(HTTPStatus.OK).json({ projectId })
- })
- .catch(err => {
- if (err instanceof chunkStore.AlreadyInitialized) {
- render.conflict(res)
- } else {
- next(err)
- }
- })
+ let historyId
+ try {
+ historyId = await chunkStore.initializeProject(projectId, snapshot)
+ } catch (err) {
+ if (err instanceof chunkStore.AlreadyInitialized) {
+ logger.warn({ err, projectId }, 'already initialized')
+ return render.conflict(res)
+ } else {
+ throw err
+ }
+ }
+
+ res.status(HTTPStatus.OK).json({ projectId: historyId })
}
-exports.importChanges = function importChanges(req, res, next) {
+async function importChanges(req, res, next) {
const projectId = req.swagger.params.project_id.value
const rawChanges = req.swagger.params.changes.value
const endVersion = req.swagger.params.end_version.value
@@ -59,7 +72,7 @@ exports.importChanges = function importChanges(req, res, next) {
try {
changes = rawChanges.map(Change.fromRaw)
} catch (err) {
- logger.error(err)
+ logger.warn({ err, projectId }, 'failed to parse changes')
return render.unprocessableEntity(res)
}
@@ -76,65 +89,102 @@ exports.importChanges = function importChanges(req, res, next) {
const batchBlobStore = new BatchBlobStore(blobStore)
const hashCheckBlobStore = new HashCheckBlobStore(blobStore)
- function loadFiles() {
+ async function loadFiles() {
const blobHashes = new Set()
- changes.forEach(function findBlobHashesToPreload(change) {
+ for (const change of changes) {
+ // This populates the set blobHashes with blobs referred to in the change
change.findBlobHashes(blobHashes)
- })
-
- function lazyLoadChangeFiles(change) {
- return change.loadFiles('lazy', batchBlobStore)
}
- return batchBlobStore
- .preload(Array.from(blobHashes))
- .then(function lazyLoadChangeFilesWithBatching() {
- return BPromise.each(changes, lazyLoadChangeFiles)
- })
+ await batchBlobStore.preload(Array.from(blobHashes))
+
+ for (const change of changes) {
+ await change.loadFiles('lazy', batchBlobStore)
+ }
}
- function buildResultSnapshot(resultChunk) {
- return BPromise.resolve(
- resultChunk || chunkStore.loadLatest(projectId)
- ).then(function (chunk) {
- const snapshot = chunk.getSnapshot()
- snapshot.applyAll(chunk.getChanges())
- return snapshot.store(hashCheckBlobStore)
- })
+ async function buildResultSnapshot(resultChunk) {
+ const chunk =
+ resultChunk ||
+ (await chunkStore.loadLatest(projectId, { persistedOnly: true }))
+ const snapshot = chunk.getSnapshot()
+ snapshot.applyAll(chunk.getChanges())
+ const rawSnapshot = await snapshot.store(hashCheckBlobStore)
+ return rawSnapshot
}
- return loadFiles()
- .then(function () {
- return persistChanges(projectId, changes, limits, endVersion)
+ await loadFiles()
+
+ let result
+ try {
+ const { historyBufferLevel, forcePersistBuffer } =
+ rollout.getHistoryBufferLevelOptions(projectId)
+ result = await commitChanges(projectId, changes, limits, endVersion, {
+ historyBufferLevel,
+ forcePersistBuffer,
})
- .then(function (result) {
- if (returnSnapshot === 'none') {
- res.status(HTTPStatus.CREATED).json({})
- } else {
- return buildResultSnapshot(result && result.currentChunk).then(
- function (rawSnapshot) {
- res.status(HTTPStatus.CREATED).json(rawSnapshot)
- }
- )
- }
- })
- .catch(err => {
- if (
- err instanceof Chunk.ConflictingEndVersion ||
- err instanceof TextOperation.UnprocessableError ||
- err instanceof File.NotEditableError ||
- err instanceof FileMap.PathnameError ||
- err instanceof Snapshot.EditMissingFileError ||
- err instanceof chunkStore.ChunkVersionConflictError
- ) {
- // If we failed to apply operations, that's probably because they were
- // invalid.
- logger.error(err)
- render.unprocessableEntity(res)
- } else if (err instanceof Chunk.NotFoundError) {
- render.notFound(res)
- } else {
- next(err)
- }
+ } catch (err) {
+ if (
+ err instanceof Chunk.ConflictingEndVersion ||
+ err instanceof TextOperation.UnprocessableError ||
+ err instanceof File.NotEditableError ||
+ err instanceof FileMap.PathnameError ||
+ err instanceof Snapshot.EditMissingFileError ||
+ err instanceof chunkStore.ChunkVersionConflictError ||
+ err instanceof InvalidChangeError
+ ) {
+ // If we failed to apply operations, that's probably because they were
+ // invalid.
+ logger.warn({ err, projectId, endVersion }, 'changes rejected by history')
+ return render.unprocessableEntity(res)
+ } else if (err instanceof Chunk.NotFoundError) {
+ logger.warn({ err, projectId }, 'chunk not found')
+ return render.notFound(res)
+ } else {
+ throw err
+ }
+ }
+
+ if (returnSnapshot === 'none') {
+ res.status(HTTPStatus.CREATED).json({
+ resyncNeeded: result.resyncNeeded,
})
+ } else {
+ const rawSnapshot = await buildResultSnapshot(result && result.currentChunk)
+ res.status(HTTPStatus.CREATED).json(rawSnapshot)
+ }
}
+
+async function flushChanges(req, res, next) {
+ const projectId = req.swagger.params.project_id.value
+ // Use the same limits importChanges, since these are passed to persistChanges
+ const farFuture = new Date()
+ farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
+ const limits = {
+ maxChanges: 0,
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ autoResync: true,
+ }
+ try {
+ await persistBuffer(projectId, limits)
+ res.status(HTTPStatus.OK).end()
+ } catch (err) {
+ if (err instanceof Chunk.NotFoundError) {
+ render.notFound(res)
+ } else {
+ throw err
+ }
+ }
+}
+
+async function expireProject(req, res, next) {
+ const projectId = req.swagger.params.project_id.value
+ await redisBackend.expireProject(projectId)
+ res.status(HTTPStatus.OK).end()
+}
+
+exports.importSnapshot = expressify(importSnapshot)
+exports.importChanges = expressify(importChanges)
+exports.flushChanges = expressify(flushChanges)
+exports.expireProject = expressify(expireProject)
diff --git a/services/history-v1/api/controllers/projects.js b/services/history-v1/api/controllers/projects.js
index 2478052a43..b7f07c4834 100644
--- a/services/history-v1/api/controllers/projects.js
+++ b/services/history-v1/api/controllers/projects.js
@@ -15,6 +15,7 @@ const {
BlobStore,
blobHash,
chunkStore,
+ redisBuffer,
HashCheckBlobStore,
ProjectArchive,
zipStore,
@@ -34,6 +35,7 @@ async function initializeProject(req, res, next) {
res.status(HTTPStatus.OK).json({ projectId })
} catch (err) {
if (err instanceof chunkStore.AlreadyInitialized) {
+ logger.warn({ err, projectId }, 'failed to initialize')
render.conflict(res)
} else {
throw err
@@ -86,6 +88,26 @@ async function getLatestHistory(req, res, next) {
}
}
+async function getLatestHistoryRaw(req, res, next) {
+ const projectId = req.swagger.params.project_id.value
+ const readOnly = req.swagger.params.readOnly.value
+ try {
+ const { startVersion, endVersion, endTimestamp } =
+ await chunkStore.getLatestChunkMetadata(projectId, { readOnly })
+ res.json({
+ startVersion,
+ endVersion,
+ endTimestamp,
+ })
+ } catch (err) {
+ if (err instanceof Chunk.NotFoundError) {
+ render.notFound(res)
+ } else {
+ throw err
+ }
+ }
+}
+
async function getHistory(req, res, next) {
const projectId = req.swagger.params.project_id.value
const version = req.swagger.params.version.value
@@ -118,6 +140,43 @@ async function getHistoryBefore(req, res, next) {
}
}
+/**
+ * Get all changes since the beginning of history or since a given version
+ */
+async function getChanges(req, res, next) {
+ const projectId = req.swagger.params.project_id.value
+ const since = req.swagger.params.since.value ?? 0
+
+ if (since < 0) {
+ // Negative values would cause an infinite loop
+ return res.status(400).json({
+ error: `Version out of bounds: ${since}`,
+ })
+ }
+
+ let chunk
+ try {
+ chunk = await chunkStore.loadAtVersion(projectId, since, {
+ preferNewer: true,
+ })
+ } catch (err) {
+ if (err instanceof Chunk.VersionNotFoundError) {
+ return res.status(400).json({
+ error: `Version out of bounds: ${since}`,
+ })
+ }
+ throw err
+ }
+
+ const latestChunkMetadata = await chunkStore.getLatestChunkMetadata(projectId)
+
+ // Extract the relevant changes from the chunk that contains the start version
+ const changes = chunk.getChanges().slice(since - chunk.getStartVersion())
+ const hasMore = latestChunkMetadata.endVersion > chunk.getEndVersion()
+
+ res.json({ changes: changes.map(change => change.toRaw()), hasMore })
+}
+
async function getZip(req, res, next) {
const projectId = req.swagger.params.project_id.value
const version = req.swagger.params.version.value
@@ -168,7 +227,9 @@ async function createZip(req, res, next) {
async function deleteProject(req, res, next) {
const projectId = req.swagger.params.project_id.value
const blobStore = new BlobStore(projectId)
+
await Promise.all([
+ redisBuffer.hardDeleteProject(projectId),
chunkStore.deleteProjectChunks(projectId),
blobStore.deleteBlobs(),
])
@@ -185,22 +246,28 @@ async function createProjectBlob(req, res, next) {
const sizeLimit = new StreamSizeLimit(maxUploadSize)
await pipeline(req, sizeLimit, fs.createWriteStream(tmpPath))
if (sizeLimit.sizeLimitExceeded) {
+ logger.warn(
+ { projectId, expectedHash, maxUploadSize },
+ 'blob exceeds size threshold'
+ )
return render.requestEntityTooLarge(res)
}
const hash = await blobHash.fromFile(tmpPath)
if (hash !== expectedHash) {
- logger.debug({ hash, expectedHash }, 'Hash mismatch')
+ logger.warn({ projectId, hash, expectedHash }, 'Hash mismatch')
return render.conflict(res, 'File hash mismatch')
}
const blobStore = new BlobStore(projectId)
const newBlob = await blobStore.putFile(tmpPath)
- try {
- const { backupBlob } = await import('../../storage/lib/backupBlob.mjs')
- await backupBlob(projectId, newBlob, tmpPath)
- } catch (error) {
- logger.warn({ error, projectId, hash }, 'Failed to backup blob')
+ if (config.has('backupStore')) {
+ try {
+ const { backupBlob } = await import('../../storage/lib/backupBlob.mjs')
+ await backupBlob(projectId, newBlob, tmpPath)
+ } catch (error) {
+ logger.warn({ error, projectId, hash }, 'Failed to backup blob')
+ }
}
res.status(HTTPStatus.CREATED).end()
})
@@ -284,6 +351,10 @@ async function copyProjectBlob(req, res, next) {
targetBlobStore.getBlob(blobHash),
])
if (!sourceBlob) {
+ logger.warn(
+ { sourceProjectId, targetProjectId, blobHash },
+ 'missing source blob when copying across projects'
+ )
return render.notFound(res)
}
// Exit early if the blob exists in the target project.
@@ -314,8 +385,10 @@ module.exports = {
getLatestHashedContent: expressify(getLatestHashedContent),
getLatestPersistedHistory: expressify(getLatestHistory),
getLatestHistory: expressify(getLatestHistory),
+ getLatestHistoryRaw: expressify(getLatestHistoryRaw),
getHistory: expressify(getHistory),
getHistoryBefore: expressify(getHistoryBefore),
+ getChanges: expressify(getChanges),
getZip: expressify(getZip),
createZip: expressify(createZip),
deleteProject: expressify(deleteProject),
diff --git a/services/history-v1/api/controllers/with_tmp_dir.js b/services/history-v1/api/controllers/with_tmp_dir.js
index a9c2e9fb47..2e0737ba69 100644
--- a/services/history-v1/api/controllers/with_tmp_dir.js
+++ b/services/history-v1/api/controllers/with_tmp_dir.js
@@ -9,7 +9,7 @@ const path = require('node:path')
* after.
*
* @param {string} prefix - prefix for the temporary directory name
- * @param {Function} fn - async function to call
+ * @param {(tmpDir: string) => Promise} fn - async function to call
*/
async function withTmpDir(prefix, fn) {
const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), prefix))
diff --git a/services/history-v1/api/swagger/index.js b/services/history-v1/api/swagger/index.js
index edfb68f4e4..3702c6ec07 100644
--- a/services/history-v1/api/swagger/index.js
+++ b/services/history-v1/api/swagger/index.js
@@ -84,6 +84,19 @@ module.exports = {
},
},
},
+ ChunkResponseRaw: {
+ properties: {
+ startVersion: {
+ type: 'number',
+ },
+ endVersion: {
+ type: 'number',
+ },
+ endTimestamp: {
+ type: 'string',
+ },
+ },
+ },
History: {
properties: {
snapshot: {
diff --git a/services/history-v1/api/swagger/project_import.js b/services/history-v1/api/swagger/project_import.js
index 60eb47fce4..043dc70667 100644
--- a/services/history-v1/api/swagger/project_import.js
+++ b/services/history-v1/api/swagger/project_import.js
@@ -100,9 +100,120 @@ const importChanges = {
],
}
+const getChanges = {
+ 'x-swagger-router-controller': 'projects',
+ operationId: 'getChanges',
+ tags: ['Project'],
+ description: 'Get changes applied to a project',
+ parameters: [
+ {
+ name: 'project_id',
+ in: 'path',
+ description: 'project id',
+ required: true,
+ type: 'string',
+ },
+ {
+ name: 'since',
+ in: 'query',
+ description: 'start version',
+ required: false,
+ type: 'number',
+ },
+ ],
+ responses: {
+ 200: {
+ description: 'Success',
+ schema: {
+ type: 'array',
+ items: {
+ $ref: '#/definitions/Change',
+ },
+ },
+ },
+ },
+ security: [
+ {
+ basic: [],
+ },
+ ],
+}
+
+const flushChanges = {
+ 'x-swagger-router-controller': 'project_import',
+ operationId: 'flushChanges',
+ tags: ['ProjectImport'],
+ description: 'Flush project changes from buffer to the chunk store.',
+ parameters: [
+ {
+ name: 'project_id',
+ in: 'path',
+ description: 'project id',
+ required: true,
+ type: 'string',
+ },
+ ],
+ responses: {
+ 200: {
+ description: 'Success',
+ schema: {
+ $ref: '#/definitions/Project',
+ },
+ },
+ 404: {
+ description: 'Not Found',
+ schema: {
+ $ref: '#/definitions/Error',
+ },
+ },
+ },
+ security: [
+ {
+ basic: [],
+ },
+ ],
+}
+
+const expireProject = {
+ 'x-swagger-router-controller': 'project_import',
+ operationId: 'expireProject',
+ tags: ['ProjectImport'],
+ description: 'Expire project changes from buffer.',
+ parameters: [
+ {
+ name: 'project_id',
+ in: 'path',
+ description: 'project id',
+ required: true,
+ type: 'string',
+ },
+ ],
+ responses: {
+ 200: {
+ description: 'Success',
+ schema: {
+ $ref: '#/definitions/Project',
+ },
+ },
+ 404: {
+ description: 'Not Found',
+ schema: {
+ $ref: '#/definitions/Error',
+ },
+ },
+ },
+ security: [
+ {
+ basic: [],
+ },
+ ],
+}
+
exports.paths = {
'/projects/{project_id}/import': { post: importSnapshot },
'/projects/{project_id}/legacy_import': { post: importSnapshot },
- '/projects/{project_id}/changes': { post: importChanges },
+ '/projects/{project_id}/changes': { get: getChanges, post: importChanges },
'/projects/{project_id}/legacy_changes': { post: importChanges },
+ '/projects/{project_id}/flush': { post: flushChanges },
+ '/projects/{project_id}/expire': { post: expireProject },
}
diff --git a/services/history-v1/api/swagger/projects.js b/services/history-v1/api/swagger/projects.js
index 39026022d0..cd4d2338fa 100644
--- a/services/history-v1/api/swagger/projects.js
+++ b/services/history-v1/api/swagger/projects.js
@@ -321,6 +321,44 @@ exports.paths = {
},
},
},
+ '/projects/{project_id}/latest/history/raw': {
+ get: {
+ 'x-swagger-router-controller': 'projects',
+ operationId: 'getLatestHistoryRaw',
+ tags: ['Project'],
+ description: 'Get the metadata of latest sequence of changes.',
+ parameters: [
+ {
+ name: 'project_id',
+ in: 'path',
+ description: 'project id',
+ required: true,
+ type: 'string',
+ },
+ {
+ name: 'readOnly',
+ in: 'query',
+ description: 'use read only database connection',
+ required: false,
+ type: 'boolean',
+ },
+ ],
+ responses: {
+ 200: {
+ description: 'Success',
+ schema: {
+ $ref: '#/definitions/ChunkResponseRaw',
+ },
+ },
+ 404: {
+ description: 'Not Found',
+ schema: {
+ $ref: '#/definitions/Error',
+ },
+ },
+ },
+ },
+ },
'/projects/{project_id}/latest/persistedHistory': {
get: {
'x-swagger-router-controller': 'projects',
diff --git a/services/history-v1/app.js b/services/history-v1/app.js
index c96a2f5ac3..dd991c1a6d 100644
--- a/services/history-v1/app.js
+++ b/services/history-v1/app.js
@@ -84,26 +84,29 @@ function setupErrorHandling() {
// Handle Swagger errors.
app.use(function (err, req, res, next) {
+ const projectId = req.swagger?.params?.project_id?.value
if (res.headersSent) {
return next(err)
}
if (err.code === 'SCHEMA_VALIDATION_FAILED') {
- logger.error(err)
+ logger.error({ err, projectId }, err.message)
return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json(err.results)
}
if (err.code === 'INVALID_TYPE' || err.code === 'PATTERN') {
- logger.error(err)
+ logger.error({ err, projectId }, err.message)
return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json({
message: 'invalid type: ' + err.paramName,
})
}
if (err.code === 'ENUM_MISMATCH') {
+ logger.warn({ err, projectId }, err.message)
return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json({
message: 'invalid enum value: ' + err.paramName,
})
}
if (err.code === 'REQUIRED') {
+ logger.warn({ err, projectId }, err.message)
return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json({
message: err.message,
})
@@ -112,7 +115,8 @@ function setupErrorHandling() {
})
app.use(function (err, req, res, next) {
- logger.error(err)
+ const projectId = req.swagger?.params?.project_id?.value
+ logger.error({ err, projectId }, err.message)
if (res.headersSent) {
return next(err)
diff --git a/services/history-v1/backup-verifier-app.mjs b/services/history-v1/backup-verifier-app.mjs
index de427a3765..856a15dd53 100644
--- a/services/history-v1/backup-verifier-app.mjs
+++ b/services/history-v1/backup-verifier-app.mjs
@@ -4,17 +4,24 @@ import '@overleaf/metrics/initialize.js'
import http from 'node:http'
import { fileURLToPath } from 'node:url'
import { promisify } from 'node:util'
+import { setTimeout } from 'node:timers/promises'
import express from 'express'
import logger from '@overleaf/logger'
import Metrics from '@overleaf/metrics'
+import { healthCheck } from './backupVerifier/healthCheck.mjs'
import {
BackupCorruptedError,
- healthCheck,
verifyBlob,
} from './storage/lib/backupVerifier.mjs'
import { mongodb } from './storage/index.js'
import { expressify } from '@overleaf/promise-utils'
import { Blob } from 'overleaf-editor-core'
+import { loadGlobalBlobs } from './storage/lib/blob_store/index.js'
+import { EventEmitter } from 'node:events'
+import {
+ loopRandomProjects,
+ setWriteMetrics,
+} from './backupVerifier/ProjectVerifier.mjs'
const app = express()
@@ -64,20 +71,47 @@ app.use((err, req, res, next) => {
next(err)
})
+const shutdownEmitter = new EventEmitter()
+
+shutdownEmitter.once('shutdown', async code => {
+ logger.info({ code }, 'shutting down')
+ await mongodb.client.close()
+ await setTimeout(100)
+ process.exit(code)
+})
+
+process.on('SIGTERM', () => {
+ shutdownEmitter.emit('shutdown', 0)
+})
+
+process.on('SIGINT', () => {
+ shutdownEmitter.emit('shutdown', 0)
+})
+
/**
* @param {number} port
+ * @param {boolean} enableVerificationLoop
* @return {Promise}
*/
-export async function startApp(port) {
+export async function startApp(port, enableVerificationLoop = true) {
await mongodb.client.connect()
+ await loadGlobalBlobs()
await healthCheck()
const server = http.createServer(app)
await promisify(server.listen.bind(server, port))()
+ enableVerificationLoop && loopRandomProjects(shutdownEmitter)
return server
}
+setWriteMetrics(true)
+
// Run this if we're called directly
if (process.argv[1] === fileURLToPath(import.meta.url)) {
const PORT = parseInt(process.env.PORT || '3102', 10)
- await startApp(PORT)
+ try {
+ await startApp(PORT)
+ } catch (error) {
+ shutdownEmitter.emit('shutdown', 1)
+ logger.error({ error }, 'error starting app')
+ }
}
diff --git a/services/history-v1/backup-worker-app.mjs b/services/history-v1/backup-worker-app.mjs
new file mode 100644
index 0000000000..b21e55aafe
--- /dev/null
+++ b/services/history-v1/backup-worker-app.mjs
@@ -0,0 +1,70 @@
+// @ts-check
+// Metrics must be initialized before importing anything else
+import '@overleaf/metrics/initialize.js'
+import http from 'node:http'
+import { fileURLToPath } from 'node:url'
+import { promisify } from 'node:util'
+import express from 'express'
+import logger from '@overleaf/logger'
+import Metrics from '@overleaf/metrics'
+import { expressify } from '@overleaf/promise-utils'
+import { drainQueue, healthCheck } from './storage/scripts/backup_worker.mjs'
+const app = express()
+
+logger.initialize('history-v1-backup-worker')
+Metrics.open_sockets.monitor()
+Metrics.injectMetricsRoute(app)
+app.use(Metrics.http.monitor(logger))
+Metrics.leaked_sockets.monitor(logger)
+Metrics.event_loop.monitor(logger)
+Metrics.memory.monitor(logger)
+
+app.get('/status', (req, res) => {
+ res.send('history-v1-backup-worker is up')
+})
+
+app.get(
+ '/health_check',
+ expressify(async (req, res) => {
+ await healthCheck()
+ res.sendStatus(200)
+ })
+)
+
+app.use((err, req, res, next) => {
+ req.logger.addFields({ err })
+ req.logger.setLevel('error')
+ next(err)
+})
+
+async function triggerGracefulShutdown(server, signal) {
+ logger.info({ signal }, 'graceful shutdown: started shutdown sequence')
+ await drainQueue()
+ server.close(function () {
+ logger.info({ signal }, 'graceful shutdown: closed server')
+ setTimeout(() => {
+ process.exit(0)
+ }, 1000)
+ })
+}
+
+/**
+ * @param {number} port
+ * @return {Promise}
+ */
+export async function startApp(port) {
+ await healthCheck()
+ const server = http.createServer(app)
+ await promisify(server.listen.bind(server, port))()
+ const signals = ['SIGINT', 'SIGTERM']
+ signals.forEach(signal => {
+ process.on(signal, () => triggerGracefulShutdown(server, signal))
+ })
+ return server
+}
+
+// Run this if we're called directly
+if (process.argv[1] === fileURLToPath(import.meta.url)) {
+ const PORT = parseInt(process.env.PORT || '3103', 10)
+ await startApp(PORT)
+}
diff --git a/services/history-v1/backupVerifier/ProjectMetrics.mjs b/services/history-v1/backupVerifier/ProjectMetrics.mjs
new file mode 100644
index 0000000000..ff37085787
--- /dev/null
+++ b/services/history-v1/backupVerifier/ProjectMetrics.mjs
@@ -0,0 +1,33 @@
+import Metrics from '@overleaf/metrics'
+import { objectIdFromDate } from './utils.mjs'
+import { db } from '../storage/lib/mongodb.js'
+
+const projectsCollection = db.collection('projects')
+
+/**
+ *
+ * @param {Date} beforeTime
+ * @return {Promise}
+ */
+export async function measurePendingChangesBeforeTime(beforeTime) {
+ const pendingChangeCount = await projectsCollection.countDocuments({
+ 'overleaf.backup.pendingChangeAt': {
+ $lt: beforeTime,
+ },
+ })
+
+ Metrics.gauge('backup_verification_pending_changes', pendingChangeCount)
+}
+
+/**
+ *
+ * @param {Date} graceTime
+ * @return {Promise}
+ */
+export async function measureNeverBackedUpProjects(graceTime) {
+ const neverBackedUpCount = await projectsCollection.countDocuments({
+ 'overleaf.backup.lastBackedUpVersion': null,
+ _id: { $lt: objectIdFromDate(graceTime) },
+ })
+ Metrics.gauge('backup_verification_never_backed_up', neverBackedUpCount)
+}
diff --git a/services/history-v1/backupVerifier/ProjectSampler.mjs b/services/history-v1/backupVerifier/ProjectSampler.mjs
new file mode 100644
index 0000000000..93d9a1a31f
--- /dev/null
+++ b/services/history-v1/backupVerifier/ProjectSampler.mjs
@@ -0,0 +1,79 @@
+// @ts-check
+import { objectIdFromDate } from './utils.mjs'
+import { db } from '../storage/lib/mongodb.js'
+import config from 'config'
+
+const projectsCollection = db.collection('projects')
+
+const HAS_PROJECTS_WITHOUT_HISTORY =
+ config.get('hasProjectsWithoutHistory') === 'true'
+
+/**
+ * @param {Date} start
+ * @param {Date} end
+ * @param {number} N
+ * @yields {string}
+ */
+export async function* getProjectsCreatedInDateRangeCursor(start, end, N) {
+ yield* getSampleProjectsCursor(N, [
+ {
+ $match: {
+ _id: {
+ $gt: objectIdFromDate(start),
+ $lte: objectIdFromDate(end),
+ },
+ },
+ },
+ ])
+}
+
+export async function* getProjectsUpdatedInDateRangeCursor(start, end, N) {
+ yield* getSampleProjectsCursor(N, [
+ {
+ $match: {
+ 'overleaf.history.updatedAt': {
+ $gt: start,
+ $lte: end,
+ },
+ },
+ },
+ ])
+}
+
+/**
+ * @typedef {import('mongodb').Document} Document
+ */
+
+/**
+ *
+ * @generator
+ * @param {number} N
+ * @param {Array} preSampleAggregationStages
+ * @yields {string}
+ */
+export async function* getSampleProjectsCursor(
+ N,
+ preSampleAggregationStages = []
+) {
+ const cursor = projectsCollection.aggregate([
+ ...preSampleAggregationStages,
+ { $sample: { size: N } },
+ { $project: { 'overleaf.history.id': 1 } },
+ ])
+
+ let validProjects = 0
+ let hasInvalidProject = false
+
+ for await (const project of cursor) {
+ if (HAS_PROJECTS_WITHOUT_HISTORY && !project.overleaf?.history?.id) {
+ hasInvalidProject = true
+ continue
+ }
+ validProjects++
+ yield project.overleaf.history.id.toString()
+ }
+
+ if (validProjects === 0 && hasInvalidProject) {
+ yield* getSampleProjectsCursor(N, preSampleAggregationStages)
+ }
+}
diff --git a/services/history-v1/backupVerifier/ProjectVerifier.mjs b/services/history-v1/backupVerifier/ProjectVerifier.mjs
new file mode 100644
index 0000000000..1e4086b700
--- /dev/null
+++ b/services/history-v1/backupVerifier/ProjectVerifier.mjs
@@ -0,0 +1,320 @@
+// @ts-check
+import { verifyProjectWithErrorContext } from '../storage/lib/backupVerifier.mjs'
+import { promiseMapSettledWithLimit } from '@overleaf/promise-utils'
+import logger from '@overleaf/logger'
+import metrics from '@overleaf/metrics'
+import {
+ getSampleProjectsCursor,
+ getProjectsCreatedInDateRangeCursor,
+ getProjectsUpdatedInDateRangeCursor,
+} from './ProjectSampler.mjs'
+import OError from '@overleaf/o-error'
+import { setTimeout } from 'node:timers/promises'
+
+const MS_PER_30_DAYS = 30 * 24 * 60 * 60 * 1000
+
+const failureCounter = new metrics.prom.Counter({
+ name: 'backup_project_verification_failed',
+ help: 'Number of projects that failed verification',
+ labelNames: ['name'],
+})
+
+const successCounter = new metrics.prom.Counter({
+ name: 'backup_project_verification_succeeded',
+ help: 'Number of projects that succeeded verification',
+})
+
+let WRITE_METRICS = false
+
+/**
+ * @typedef {import('node:events').EventEmitter} EventEmitter
+ */
+
+/**
+ * Allows writing metrics to be enabled or disabled.
+ * @param {Boolean} writeMetrics
+ */
+export function setWriteMetrics(writeMetrics) {
+ WRITE_METRICS = writeMetrics
+}
+
+/**
+ *
+ * @param {Error|unknown} error
+ * @param {string} historyId
+ */
+function handleVerificationError(error, historyId) {
+ const name = error instanceof Error ? error.name : 'UnknownError'
+ logger.error({ historyId, error, name }, 'error verifying project backup')
+
+ WRITE_METRICS && failureCounter.inc({ name })
+
+ return name
+}
+
+/**
+ *
+ * @param {Date} startDate
+ * @param {Date} endDate
+ * @param {number} interval
+ * @returns {Array}
+ */
+function splitJobs(startDate, endDate, interval) {
+ /** @type {Array} */
+ const jobs = []
+ while (startDate < endDate) {
+ const nextStart = new Date(
+ Math.min(startDate.getTime() + interval, endDate.getTime())
+ )
+ jobs.push({ startDate, endDate: nextStart })
+ startDate = nextStart
+ }
+ return jobs
+}
+
+/**
+ *
+ * @param {AsyncGenerator} historyIdCursor
+ * @param {EventEmitter} [eventEmitter]
+ * @param {number} [delay] - Allows a delay between each verification
+ * @return {Promise<{verified: number, total: number, errorTypes: *[], hasFailure: boolean}>}
+ */
+async function verifyProjectsFromCursor(
+ historyIdCursor,
+ eventEmitter,
+ delay = 0
+) {
+ const errorTypes = []
+ let verified = 0
+ let total = 0
+ let receivedShutdownSignal = false
+ if (eventEmitter) {
+ eventEmitter.once('shutdown', () => {
+ receivedShutdownSignal = true
+ })
+ }
+ for await (const historyId of historyIdCursor) {
+ if (receivedShutdownSignal) {
+ break
+ }
+ total++
+ try {
+ await verifyProjectWithErrorContext(historyId)
+ logger.debug({ historyId }, 'verified project backup successfully')
+ WRITE_METRICS && successCounter.inc()
+ verified++
+ } catch (error) {
+ const errorType = handleVerificationError(error, historyId)
+ errorTypes.push(errorType)
+ }
+ if (delay > 0) {
+ await setTimeout(delay)
+ }
+ }
+ return {
+ verified,
+ total,
+ errorTypes,
+ hasFailure: errorTypes.length > 0,
+ }
+}
+
+/**
+ *
+ * @param {number} nProjectsToSample
+ * @param {EventEmitter} [signal]
+ * @param {number} [delay]
+ * @return {Promise}
+ */
+export async function verifyRandomProjectSample(
+ nProjectsToSample,
+ signal,
+ delay = 0
+) {
+ const historyIds = await getSampleProjectsCursor(nProjectsToSample)
+ return await verifyProjectsFromCursor(historyIds, signal, delay)
+}
+
+/**
+ * Samples projects with history IDs between the specified dates and verifies them.
+ *
+ * @param {Date} startDate
+ * @param {Date} endDate
+ * @param {number} projectsPerRange
+ * @param {EventEmitter} [signal]
+ * @return {Promise}
+ */
+async function verifyRange(startDate, endDate, projectsPerRange, signal) {
+ logger.info({ startDate, endDate }, 'verifying range')
+
+ const results = await verifyProjectsFromCursor(
+ getProjectsCreatedInDateRangeCursor(startDate, endDate, projectsPerRange),
+ signal
+ )
+
+ if (results.total === 0) {
+ logger.debug(
+ { start: startDate, end: endDate },
+ 'No projects found in range'
+ )
+ }
+
+ const jobStatus = {
+ ...results,
+ startDate,
+ endDate,
+ }
+
+ logger.debug(
+ { ...jobStatus, errorTypes: Array.from(new Set(jobStatus.errorTypes)) },
+ 'Verified range'
+ )
+ return jobStatus
+}
+
+/**
+ * @typedef {Object} VerificationJobSpecification
+ * @property {Date} startDate
+ * @property {Date} endDate
+ */
+
+/**
+ * @typedef {import('./types.d.ts').VerificationJobStatus} VerificationJobStatus
+ */
+
+/**
+ * @typedef {Object} VerifyDateRangeOptions
+ * @property {Date} startDate
+ * @property {Date} endDate
+ * @property {number} [interval]
+ * @property {number} [projectsPerRange]
+ * @property {number} [concurrency]
+ * @property {EventEmitter} [signal]
+ */
+
+/**
+ *
+ * @param {VerifyDateRangeOptions} options
+ * @return {Promise}
+ */
+export async function verifyProjectsCreatedInDateRange({
+ concurrency = 0,
+ projectsPerRange = 10,
+ startDate,
+ endDate,
+ interval = MS_PER_30_DAYS,
+ signal,
+}) {
+ const jobs = splitJobs(startDate, endDate, interval)
+ if (jobs.length === 0) {
+ throw new OError('Time range could not be split into jobs', {
+ start: startDate,
+ end: endDate,
+ interval,
+ })
+ }
+ const settlements = await promiseMapSettledWithLimit(
+ concurrency,
+ jobs,
+ ({ startDate, endDate }) =>
+ verifyRange(startDate, endDate, projectsPerRange, signal)
+ )
+ return settlements.reduce(
+ /**
+ *
+ * @param {VerificationJobStatus} acc
+ * @param settlement
+ * @return {VerificationJobStatus}
+ */
+ (acc, settlement) => {
+ if (settlement.status !== 'rejected') {
+ if (settlement.value.hasFailure) {
+ acc.hasFailure = true
+ }
+ acc.total += settlement.value.total
+ acc.verified += settlement.value.verified
+ acc.errorTypes = acc.errorTypes.concat(settlement.value.errorTypes)
+ } else {
+ logger.error({ ...settlement.reason }, 'Error processing range')
+ }
+ return acc
+ },
+ /** @type {VerificationJobStatus} */
+ {
+ startDate,
+ endDate,
+ verified: 0,
+ total: 0,
+ hasFailure: false,
+ errorTypes: [],
+ }
+ )
+}
+
+/**
+ * Verifies that projects that have recently gone out of RPO have been updated.
+ *
+ * @param {Date} startDate
+ * @param {Date} endDate
+ * @param {number} nProjects
+ * @param {EventEmitter} [signal]
+ * @return {Promise}
+ */
+export async function verifyProjectsUpdatedInDateRange(
+ startDate,
+ endDate,
+ nProjects,
+ signal
+) {
+ logger.debug(
+ { startDate, endDate, nProjects },
+ 'Sampling projects updated in date range'
+ )
+ const results = await verifyProjectsFromCursor(
+ getProjectsUpdatedInDateRangeCursor(startDate, endDate, nProjects),
+ signal
+ )
+
+ if (results.total === 0) {
+ logger.debug(
+ { start: startDate, end: endDate },
+ 'No projects updated recently'
+ )
+ }
+
+ const jobStatus = {
+ ...results,
+ startDate,
+ endDate,
+ }
+
+ logger.debug(
+ { ...jobStatus, errorTypes: Array.from(new Set(jobStatus.errorTypes)) },
+ 'Verified recently updated projects'
+ )
+ return jobStatus
+}
+
+/**
+ *
+ * @param {EventEmitter} signal
+ * @return {void}
+ */
+export function loopRandomProjects(signal) {
+ let shutdown = false
+ signal.on('shutdown', function () {
+ shutdown = true
+ })
+ async function loop() {
+ do {
+ try {
+ const result = await verifyRandomProjectSample(100, signal, 2_000)
+ logger.debug({ result }, 'verified random project sample')
+ } catch (error) {
+ logger.error({ error }, 'error verifying random project sample')
+ }
+ // eslint-disable-next-line no-unmodified-loop-condition
+ } while (!shutdown)
+ }
+ loop()
+}
diff --git a/services/history-v1/backupVerifier/healthCheck.mjs b/services/history-v1/backupVerifier/healthCheck.mjs
new file mode 100644
index 0000000000..af998748b5
--- /dev/null
+++ b/services/history-v1/backupVerifier/healthCheck.mjs
@@ -0,0 +1,32 @@
+import config from 'config'
+import { verifyProjectWithErrorContext } from '../storage/lib/backupVerifier.mjs'
+import {
+ measureNeverBackedUpProjects,
+ measurePendingChangesBeforeTime,
+} from './ProjectMetrics.mjs'
+import { getEndDateForRPO, RPO } from './utils.mjs'
+
+/** @type {Array} */
+const HEALTH_CHECK_PROJECTS = JSON.parse(config.get('healthCheckProjects'))
+
+export async function healthCheck() {
+ if (!Array.isArray(HEALTH_CHECK_PROJECTS)) {
+ throw new Error('expected healthCheckProjects to be an array')
+ }
+ if (HEALTH_CHECK_PROJECTS.length !== 2) {
+ throw new Error('expected 2 healthCheckProjects')
+ }
+ if (!HEALTH_CHECK_PROJECTS.some(id => id.length === 24)) {
+ throw new Error('expected mongo id in healthCheckProjects')
+ }
+ if (!HEALTH_CHECK_PROJECTS.some(id => id.length < 24)) {
+ throw new Error('expected postgres id in healthCheckProjects')
+ }
+
+ for (const historyId of HEALTH_CHECK_PROJECTS) {
+ await verifyProjectWithErrorContext(historyId)
+ }
+
+ await measurePendingChangesBeforeTime(getEndDateForRPO(2))
+ await measureNeverBackedUpProjects(getEndDateForRPO(2))
+}
diff --git a/services/history-v1/backupVerifier/types.d.ts b/services/history-v1/backupVerifier/types.d.ts
new file mode 100644
index 0000000000..7bfa4a85ff
--- /dev/null
+++ b/services/history-v1/backupVerifier/types.d.ts
@@ -0,0 +1,8 @@
+export type VerificationJobStatus = {
+ verified: number
+ total: number
+ startDate?: Date
+ endDate?: Date
+ hasFailure: boolean
+ errorTypes: Array
+}
diff --git a/services/history-v1/backupVerifier/utils.mjs b/services/history-v1/backupVerifier/utils.mjs
new file mode 100644
index 0000000000..b2d7ed2d3c
--- /dev/null
+++ b/services/history-v1/backupVerifier/utils.mjs
@@ -0,0 +1,35 @@
+import { ObjectId } from 'mongodb'
+import config from 'config'
+
+export const RPO = parseInt(config.get('backupRPOInMS'), 10)
+
+/**
+ * @param {Date} time
+ * @return {ObjectId}
+ */
+export function objectIdFromDate(time) {
+ return ObjectId.createFromTime(time.getTime() / 1000)
+}
+
+/**
+ * @param {number} [factor] - Multiply RPO by this factor, default is 1
+ * @return {Date}
+ */
+export function getEndDateForRPO(factor = 1) {
+ return new Date(Date.now() - RPO * factor)
+}
+
+/**
+ * Creates a startDate, endDate pair that checks a period of time before the RPO horizon
+ *
+ * @param {number} offset - How many seconds we should check
+ * @return {{endDate: Date, startDate: Date}}
+ */
+export function getDatesBeforeRPO(offset) {
+ const now = new Date()
+ const endDate = new Date(now.getTime() - RPO)
+ return {
+ endDate,
+ startDate: new Date(endDate.getTime() - offset * 1000),
+ }
+}
diff --git a/services/history-v1/buildscript.txt b/services/history-v1/buildscript.txt
index 9ab6fff24a..4ce8eb63c7 100644
--- a/services/history-v1/buildscript.txt
+++ b/services/history-v1/buildscript.txt
@@ -1,10 +1,10 @@
history-v1
---dependencies=postgres,gcs,mongo,s3
+--dependencies=postgres,gcs,mongo,redis,s3
--docker-repos=us-east1-docker.pkg.dev/overleaf-ops/ol-docker
--env-add=
--env-pass-through=
--esmock-loader=False
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=False
---script-version=4.5.0
---tsconfig-extra-includes=backup-deletion-app.mjs,backup-verifier-app.mjs,api/**/*,migrations/**/*,storage/**/*
+--script-version=4.7.0
+--tsconfig-extra-includes=backup-deletion-app.mjs,backup-verifier-app.mjs,backup-worker-app.mjs,api/**/*,migrations/**/*,storage/**/*
diff --git a/services/history-v1/config/custom-environment-variables.json b/services/history-v1/config/custom-environment-variables.json
index 15c0a9dc01..686ca25407 100644
--- a/services/history-v1/config/custom-environment-variables.json
+++ b/services/history-v1/config/custom-environment-variables.json
@@ -1,5 +1,6 @@
{
"databaseUrl": "HISTORY_CONNECTION_STRING",
+ "databaseUrlReadOnly": "HISTORY_FOLLOWER_CONNECTION_STRING",
"herokuDatabaseUrl": "DATABASE_URL",
"databasePoolMin": "DATABASE_POOL_MIN",
"databasePoolMax": "DATABASE_POOL_MAX",
@@ -65,6 +66,7 @@
},
"healthCheckBlobs": "HEALTH_CHECK_BLOBS",
"healthCheckProjects": "HEALTH_CHECK_PROJECTS",
+ "backupRPOInMS": "BACKUP_RPO_IN_MS",
"minSoftDeletionPeriodDays": "MIN_SOFT_DELETION_PERIOD_DAYS",
"mongo": {
"uri": "MONGO_CONNECTION_STRING"
@@ -81,5 +83,30 @@
"clusterWorkers": "CLUSTER_WORKERS",
"maxFileUploadSize": "MAX_FILE_UPLOAD_SIZE",
"httpsOnly": "HTTPS_ONLY",
- "httpRequestTimeout": "HTTP_REQUEST_TIMEOUT"
+ "httpRequestTimeout": "HTTP_REQUEST_TIMEOUT",
+ "historyBufferLevel": "HISTORY_BUFFER_LEVEL",
+ "forcePersistBuffer": "FORCE_PERSIST_BUFFER",
+ "nextHistoryBufferLevel": "NEXT_HISTORY_BUFFER_LEVEL",
+ "nextHistoryBufferLevelRolloutPercentage": "NEXT_HISTORY_BUFFER_LEVEL_ROLLOUT_PERCENTAGE",
+ "redis": {
+ "queue": {
+ "host": "QUEUES_REDIS_HOST",
+ "password": "QUEUES_REDIS_PASSWORD",
+ "port": "QUEUES_REDIS_PORT"
+ },
+ "history": {
+ "host": "HISTORY_REDIS_HOST",
+ "password": "HISTORY_REDIS_PASSWORD",
+ "port": "HISTORY_REDIS_PORT"
+ },
+ "lock": {
+ "host": "REDIS_HOST",
+ "password": "REDIS_PASSWORD",
+ "port": "REDIS_PORT"
+ }
+ },
+ "projectHistory": {
+ "host": "PROJECT_HISTORY_HOST",
+ "port": "PROJECT_HISTORY_PORT"
+ }
}
diff --git a/services/history-v1/config/default.json b/services/history-v1/config/default.json
index 74c5bcd237..e7732fe3f7 100644
--- a/services/history-v1/config/default.json
+++ b/services/history-v1/config/default.json
@@ -23,12 +23,14 @@
}
}
},
+ "backupRPOInMS": "3600000",
"chunkStore": {
"historyStoreConcurrency": "4"
},
"zipStore": {
"zipTimeoutMs": "360000"
},
+ "hasProjectsWithoutHistory": false,
"minSoftDeletionPeriodDays": "90",
"maxDeleteKeys": "1000",
"useDeleteObjects": "true",
@@ -37,5 +39,8 @@
"databasePoolMin": "2",
"databasePoolMax": "10",
"httpsOnly": "false",
- "httpRequestTimeout": "300000"
+ "httpRequestTimeout": "300000",
+ "projectHistory": {
+ "port": "3054"
+ }
}
diff --git a/services/history-v1/config/test.json b/services/history-v1/config/test.json
index ab192b0a92..c38e28e564 100644
--- a/services/history-v1/config/test.json
+++ b/services/history-v1/config/test.json
@@ -1,5 +1,6 @@
{
"databaseUrl": "postgres://overleaf:overleaf@postgres/overleaf-history-v1-test",
+ "databaseUrlReadOnly": "postgres://read_only:password@postgres/overleaf-history-v1-test",
"persistor": {
"backend": "gcs",
"gcs": {
@@ -35,6 +36,7 @@
},
"healthCheckBlobs": "[\"42/f70d7bba4ae1f07682e0358bd7a2068094fc023b\",\"000000000000000000000042/98d5521fe746bc2d11761edab5d0829bee286009\"]",
"healthCheckProjects": "[\"42\",\"000000000000000000000042\"]",
+ "backupRPOInMS": "360000",
"maxDeleteKeys": "3",
"useDeleteObjects": "false",
"mongo": {
diff --git a/services/history-v1/docker-compose.ci.yml b/services/history-v1/docker-compose.ci.yml
index 4950c091b8..cf6ec3357d 100644
--- a/services/history-v1/docker-compose.ci.yml
+++ b/services/history-v1/docker-compose.ci.yml
@@ -19,6 +19,10 @@ services:
image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
environment:
ELASTIC_SEARCH_DSN: es:9200
+ REDIS_HOST: redis
+ QUEUES_REDIS_HOST: redis
+ HISTORY_REDIS_HOST: redis
+ ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
AWS_S3_ENDPOINT: https://minio:9000
@@ -35,8 +39,11 @@ services:
NODE_OPTIONS: "--unhandled-rejections=strict"
volumes:
- ./test/acceptance/certs:/certs
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
depends_on:
mongo:
+ condition: service_started
+ redis:
condition: service_healthy
postgres:
condition: service_healthy
@@ -49,6 +56,7 @@ services:
gcs:
condition: service_healthy
user: node
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run test:acceptance
@@ -59,26 +67,40 @@ services:
- ./:/tmp/build/
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
- mongo:
- image: mongo:6.0.13
- command: --replSet overleaf
+ redis:
+ image: redis:7.4.3
healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
+ test: ping="$$(redis-cli ping)" && [ "$$ping" = 'PONG' ]
interval: 1s
retries: 20
+
+ mongo:
+ image: mongo:8.0.11
+ command: --replSet overleaf
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
postgres:
image: postgres:10
environment:
POSTGRES_USER: overleaf
POSTGRES_PASSWORD: overleaf
POSTGRES_DB: overleaf-history-v1-test
+ volumes:
+ - ./test/acceptance/pg-init/:/docker-entrypoint-initdb.d/
healthcheck:
test: pg_isready --quiet
interval: 1s
retries: 20
certs:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- ./test/acceptance/certs:/certs
working_dir: /certs
diff --git a/services/history-v1/docker-compose.yml b/services/history-v1/docker-compose.yml
index 17c41a1dad..3a33882d28 100644
--- a/services/history-v1/docker-compose.yml
+++ b/services/history-v1/docker-compose.yml
@@ -17,6 +17,7 @@ services:
working_dir: /overleaf/services/history-v1
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
@@ -32,9 +33,14 @@ services:
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
- ./test/acceptance/certs:/certs
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
working_dir: /overleaf/services/history-v1
environment:
ELASTIC_SEARCH_DSN: es:9200
+ REDIS_HOST: redis
+ HISTORY_REDIS_HOST: redis
+ QUEUES_REDIS_HOST: redis
+ ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
AWS_S3_ENDPOINT: https://minio:9000
@@ -47,12 +53,14 @@ services:
GCS_PROJECT_ID: fake
STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
+ condition: service_started
+ redis:
condition: service_healthy
postgres:
condition: service_healthy
@@ -64,29 +72,44 @@ services:
condition: service_completed_successfully
gcs:
condition: service_healthy
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run --silent test:acceptance
- mongo:
- image: mongo:6.0.13
- command: --replSet overleaf
+ redis:
+ image: redis:7.4.3
healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
+ test: ping=$$(redis-cli ping) && [ "$$ping" = 'PONG' ]
interval: 1s
retries: 20
+ mongo:
+ image: mongo:8.0.11
+ command: --replSet overleaf
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
+
postgres:
image: postgres:10
environment:
POSTGRES_USER: overleaf
POSTGRES_PASSWORD: overleaf
POSTGRES_DB: overleaf-history-v1-test
+ volumes:
+ - ./test/acceptance/pg-init/:/docker-entrypoint-initdb.d/
healthcheck:
test: pg_isready --host=localhost --quiet
interval: 1s
retries: 20
certs:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- ./test/acceptance/certs:/certs
working_dir: /certs
diff --git a/services/history-v1/migrations/20250415210802_add_chunks_closed.js b/services/history-v1/migrations/20250415210802_add_chunks_closed.js
new file mode 100644
index 0000000000..b5c1d577f9
--- /dev/null
+++ b/services/history-v1/migrations/20250415210802_add_chunks_closed.js
@@ -0,0 +1,27 @@
+// @ts-check
+
+/**
+ * @import { Knex } from "knex"
+ */
+
+/**
+ * @param { Knex } knex
+ * @returns { Promise }
+ */
+exports.up = async function (knex) {
+ await knex.raw(`
+ ALTER TABLE chunks
+ ADD COLUMN closed BOOLEAN NOT NULL DEFAULT FALSE
+ `)
+}
+
+/**
+ * @param { Knex } knex
+ * @returns { Promise }
+ */
+exports.down = async function (knex) {
+ await knex.raw(`
+ ALTER TABLE chunks
+ DROP COLUMN closed
+ `)
+}
diff --git a/services/history-v1/package.json b/services/history-v1/package.json
index a207ea4eb0..4796cafd03 100644
--- a/services/history-v1/package.json
+++ b/services/history-v1/package.json
@@ -7,6 +7,7 @@
"private": true,
"dependencies": {
"@google-cloud/secret-manager": "^5.6.0",
+ "@overleaf/fetch-utils": "*",
"@overleaf/logger": "*",
"@overleaf/metrics": "*",
"@overleaf/mongo-utils": "*",
@@ -20,11 +21,12 @@
"basic-auth": "^2.0.1",
"bluebird": "^3.7.2",
"body-parser": "^1.20.3",
+ "bull": "^4.16.5",
"bunyan": "^1.8.12",
"check-types": "^11.1.2",
"command-line-args": "^3.0.3",
- "config": "^1.19.0",
- "express": "^4.21.0",
+ "config": "^3.3.12",
+ "express": "^4.21.2",
"fs-extra": "^9.0.1",
"generic-pool": "^2.1.1",
"helmet": "^3.22.0",
@@ -32,8 +34,10 @@
"jsonwebtoken": "^9.0.0",
"knex": "^2.4.0",
"lodash": "^4.17.19",
- "mongodb": "6.10.0",
+ "mongodb": "6.12.0",
"overleaf-editor-core": "*",
+ "p-limit": "^6.2.0",
+ "p-queue": "^8.1.0",
"pg": "^8.7.1",
"pg-query-stream": "^4.2.4",
"swagger-tools": "^0.10.4",
@@ -46,7 +50,8 @@
"benny": "^3.7.1",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
- "mocha": "^10.2.0",
+ "chai-exclude": "^2.1.1",
+ "mocha": "^11.1.0",
"node-fetch": "^2.7.0",
"sinon": "^9.0.2",
"swagger-client": "^3.10.0",
diff --git a/services/history-v1/storage/index.js b/services/history-v1/storage/index.js
index 7fd1d589ea..6bc81f60e8 100644
--- a/services/history-v1/storage/index.js
+++ b/services/history-v1/storage/index.js
@@ -2,11 +2,16 @@ exports.BatchBlobStore = require('./lib/batch_blob_store')
exports.blobHash = require('./lib/blob_hash')
exports.HashCheckBlobStore = require('./lib/hash_check_blob_store')
exports.chunkStore = require('./lib/chunk_store')
+exports.redisBuffer = require('./lib/chunk_store/redis')
exports.historyStore = require('./lib/history_store').historyStore
exports.knex = require('./lib/knex')
exports.mongodb = require('./lib/mongodb')
+exports.redis = require('./lib/redis')
exports.persistChanges = require('./lib/persist_changes')
exports.persistor = require('./lib/persistor')
+exports.persistBuffer = require('./lib/persist_buffer')
+exports.commitChanges = require('./lib/commit_changes')
+exports.queueChanges = require('./lib/queue_changes')
exports.ProjectArchive = require('./lib/project_archive')
exports.streams = require('./lib/streams')
exports.temp = require('./lib/temp')
@@ -15,3 +20,9 @@ exports.zipStore = require('./lib/zip_store')
const { BlobStore, loadGlobalBlobs } = require('./lib/blob_store')
exports.BlobStore = BlobStore
exports.loadGlobalBlobs = loadGlobalBlobs
+
+const { InvalidChangeError } = require('./lib/errors')
+exports.InvalidChangeError = InvalidChangeError
+
+const { ChunkVersionConflictError } = require('./lib/chunk_store/errors')
+exports.ChunkVersionConflictError = ChunkVersionConflictError
diff --git a/services/history-v1/storage/lib/assert.js b/services/history-v1/storage/lib/assert.js
index d0ce318b4d..91f24da7e0 100644
--- a/services/history-v1/storage/lib/assert.js
+++ b/services/history-v1/storage/lib/assert.js
@@ -1,5 +1,7 @@
'use strict'
+const OError = require('@overleaf/o-error')
+
const check = require('check-types')
const { Blob } = require('overleaf-editor-core')
@@ -7,41 +9,58 @@ const assert = check.assert
const MONGO_ID_REGEXP = /^[0-9a-f]{24}$/
const POSTGRES_ID_REGEXP = /^[1-9][0-9]{0,9}$/
-const PROJECT_ID_REGEXP = /^([0-9a-f]{24}|[1-9][0-9]{0,9})$/
+const MONGO_OR_POSTGRES_ID_REGEXP = /^([0-9a-f]{24}|[1-9][0-9]{0,9})$/
function transaction(transaction, message) {
assert.function(transaction, message)
}
function blobHash(arg, message) {
- assert.match(arg, Blob.HEX_HASH_RX, message)
+ try {
+ assert.match(arg, Blob.HEX_HASH_RX, message)
+ } catch (error) {
+ throw OError.tag(error, message, { arg })
+ }
+}
+
+/**
+ * A project id is a string that contains either an integer (for projects stored in Postgres) or 24
+ * hex digits (for projects stored in Mongo)
+ */
+function projectId(arg, message) {
+ try {
+ assert.match(arg, MONGO_OR_POSTGRES_ID_REGEXP, message)
+ } catch (error) {
+ throw OError.tag(error, message, { arg })
+ }
}
/**
* A chunk id is a string that contains either an integer (for projects stored in Postgres) or 24
* hex digits (for projects stored in Mongo)
*/
-function projectId(arg, message) {
- assert.match(arg, PROJECT_ID_REGEXP, message)
-}
-
-/**
- * A chunk id is either a number (for projects stored in Postgres) or a 24
- * character string (for projects stored in Mongo)
- */
function chunkId(arg, message) {
- const valid = check.integer(arg) || check.match(arg, MONGO_ID_REGEXP)
- if (!valid) {
- throw new TypeError(message)
+ try {
+ assert.match(arg, MONGO_OR_POSTGRES_ID_REGEXP, message)
+ } catch (error) {
+ throw OError.tag(error, message, { arg })
}
}
function mongoId(arg, message) {
- assert.match(arg, MONGO_ID_REGEXP)
+ try {
+ assert.match(arg, MONGO_ID_REGEXP, message)
+ } catch (error) {
+ throw OError.tag(error, message, { arg })
+ }
}
function postgresId(arg, message) {
- assert.match(arg, POSTGRES_ID_REGEXP, message)
+ try {
+ assert.match(arg, POSTGRES_ID_REGEXP, message)
+ } catch (error) {
+ throw OError.tag(error, message, { arg })
+ }
}
module.exports = {
diff --git a/services/history-v1/storage/lib/backupArchiver.mjs b/services/history-v1/storage/lib/backupArchiver.mjs
new file mode 100644
index 0000000000..c6f0e3755d
--- /dev/null
+++ b/services/history-v1/storage/lib/backupArchiver.mjs
@@ -0,0 +1,474 @@
+// @ts-check
+import path from 'node:path'
+import projectKey from './project_key.js'
+import {
+ chunksBucket,
+ backupPersistor,
+ projectBlobsBucket,
+ globalBlobsBucket as backupGlobalBlobsBucket,
+} from './backupPersistor.mjs'
+import core, { Chunk, History } from 'overleaf-editor-core'
+import {
+ GLOBAL_BLOBS,
+ makeProjectKey,
+ getStringLengthOfFile,
+ makeGlobalKey,
+} from './blob_store/index.js'
+import streams from './streams.js'
+import objectPersistor from '@overleaf/object-persistor'
+import OError from '@overleaf/o-error'
+import chunkStore from './chunk_store/index.js'
+import logger from '@overleaf/logger'
+import fs from 'node:fs'
+import { pipeline } from 'node:stream/promises'
+import withTmpDir from '../../api/controllers/with_tmp_dir.js'
+import { loadChunk } from './backupVerifier.mjs'
+import globalBlobPersistor from './persistor.js'
+import config from 'config'
+import { NoKEKMatchedError } from '@overleaf/object-persistor/src/Errors.js'
+
+const globalBlobsBucket = config.get('blobStore.globalBucket')
+
+class BackupBlobStore {
+ /**
+ *
+ * @param {string} historyId
+ * @param {string} tmp
+ * @param {CachedPerProjectEncryptedS3Persistor} persistor
+ * @param {boolean} useBackupGlobalBlobs
+ */
+ constructor(historyId, tmp, persistor, useBackupGlobalBlobs) {
+ this.historyId = historyId
+ this.tmp = tmp
+ this.blobs = new Map()
+ this.persistor = persistor
+ this.useBackupGlobalBlobs = useBackupGlobalBlobs
+ }
+
+ /**
+ * Required for BlobStore interface - not supported.
+ *
+ * @template T
+ * @param {string} hash
+ * @return {Promise}
+ */
+ async getObject(hash) {
+ try {
+ const stream = await this.getStream(hash)
+ const buffer = await streams.readStreamToBuffer(stream)
+ return JSON.parse(buffer.toString())
+ } catch (err) {
+ logger.warn({ err, hash }, 'Failed to fetch chunk blob')
+ throw err
+ }
+ }
+
+ /**
+ *
+ * @param {Set} hashes
+ * @return {Promise}
+ */
+ async fetchBlobs(hashes) {
+ for await (const hash of hashes) {
+ if (this.blobs.has(hash)) return
+ const path = `${this.tmp}/${hash}`
+ /** @type {core.Blob} */
+ let blob
+ /** @type {NodeJS.ReadableStream} */
+ let blobStream
+ if (GLOBAL_BLOBS.has(hash)) {
+ try {
+ const blobData = await this.fetchGlobalBlob(hash)
+ await pipeline(blobData.stream, fs.createWriteStream(path))
+ blob = blobData.blob
+ } catch (err) {
+ logger.warn({ hash, err }, 'Failed to fetch global blob')
+ continue
+ }
+ } else {
+ try {
+ blobStream = await fetchBlob(this.historyId, hash, this.persistor)
+ await pipeline(blobStream, fs.createWriteStream(path))
+ blob = await this.makeBlob(hash, path)
+ } catch (err) {
+ logger.warn({ err, hash }, 'Failed to fetch chunk blob')
+ continue
+ }
+ }
+
+ this.blobs.set(hash, blob)
+ }
+ }
+
+ /**
+ *
+ * @param {string} hash
+ * @return {Promise<{ blob: core.Blob, stream: NodeJS.ReadableStream }>}
+ */
+ async fetchGlobalBlob(hash) {
+ const globalBlob = GLOBAL_BLOBS.get(hash)
+ if (!globalBlob) {
+ throw new Error('blob does not exist or is not a global blob')
+ }
+ let stream
+
+ const key = makeGlobalKey(hash)
+
+ if (this.useBackupGlobalBlobs) {
+ stream = await this.persistor.getObjectStream(
+ backupGlobalBlobsBucket,
+ key
+ )
+ } else {
+ stream = await globalBlobPersistor.getObjectStream(globalBlobsBucket, key)
+ }
+ return { blob: globalBlob.blob, stream }
+ }
+
+ /**
+ *
+ * @param {string} hash
+ * @param {string} pathname
+ * @return {Promise}
+ */
+ async makeBlob(hash, pathname) {
+ const stat = await fs.promises.stat(pathname)
+ const byteLength = stat.size
+ const stringLength = await getStringLengthOfFile(byteLength, pathname)
+ if (stringLength) {
+ return new core.Blob(hash, byteLength, stringLength)
+ }
+ return new core.Blob(hash, byteLength)
+ }
+
+ /**
+ *
+ * @param {string} hash
+ * @return {Promise}
+ */
+ async getString(hash) {
+ const stream = await this.getStream(hash)
+ const buffer = await streams.readStreamToBuffer(stream)
+ return buffer.toString()
+ }
+
+ /**
+ *
+ * @param {string} hash
+ * @return {Promise}
+ */
+ async getStream(hash) {
+ return fs.createReadStream(this.getBlobPathname(hash))
+ }
+
+ /**
+ *
+ * @param {string} hash
+ * @return {Promise}
+ */
+ async getBlob(hash) {
+ return this.blobs.get(hash)
+ }
+
+ /**
+ *
+ * @param {string} hash
+ * @return {string}
+ */
+ getBlobPathname(hash) {
+ return path.join(this.tmp, hash)
+ }
+}
+
+/**
+ * @typedef {(import('@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor.js').CachedPerProjectEncryptedS3Persistor)} CachedPerProjectEncryptedS3Persistor
+ */
+
+/**
+ * @typedef {(import('archiver').Archiver)} Archiver
+ */
+
+/**
+ * @typedef {(import('overleaf-editor-core').FileMap)} FileMap
+ */
+
+/**
+ *
+ * @param historyId
+ * @return {Promise}
+ */
+async function getProjectPersistor(historyId) {
+ try {
+ return await backupPersistor.forProjectRO(
+ projectBlobsBucket,
+ makeProjectKey(historyId, '')
+ )
+ } catch (error) {
+ if (error instanceof NoKEKMatchedError) {
+ logger.info({}, 'no kek matched')
+ }
+ throw new BackupPersistorError(
+ 'Failed to get project persistor',
+ { historyId },
+ error instanceof Error ? error : undefined
+ )
+ }
+}
+
+/**
+ *
+ * @param persistor
+ * @param {string} key
+ * @return {Promise<{chunkData: any, buffer: Buffer}>}
+ */
+async function loadChunkByKey(persistor, key) {
+ try {
+ const buf = await streams.gunzipStreamToBuffer(
+ await persistor.getObjectStream(chunksBucket, key)
+ )
+ return { chunkData: JSON.parse(buf.toString('utf-8')), buffer: buf }
+ } catch (err) {
+ if (err instanceof objectPersistor.Errors.NotFoundError) {
+ throw new Chunk.NotPersistedError('chunk not found')
+ }
+ if (err instanceof Error) {
+ throw OError.tag(err, 'Failed to load chunk', { key })
+ }
+ throw err
+ }
+}
+
+/**
+ *
+ * @param {string} historyId
+ * @param {string} hash
+ * @param {CachedPerProjectEncryptedS3Persistor} persistor
+ * @return {Promise}
+ */
+async function fetchBlob(historyId, hash, persistor) {
+ const path = makeProjectKey(historyId, hash)
+ return await persistor.getObjectStream(projectBlobsBucket, path, {
+ autoGunzip: true,
+ })
+}
+
+/**
+ * @typedef {object} AddChunkOptions
+ * @property {string} [prefix] Should include trailing slash (if length > 0)
+ * @property {boolean} [useBackupGlobalBlobs]
+ */
+
+/**
+ *
+ * @param {History} history
+ * @param {Archiver} archive
+ * @param {CachedPerProjectEncryptedS3Persistor} projectCache
+ * @param {string} historyId
+ * @param {AddChunkOptions} [options]
+ * @returns {Promise}
+ */
+async function addChunkToArchive(
+ history,
+ archive,
+ projectCache,
+ historyId,
+ { prefix = '', useBackupGlobalBlobs = false } = {}
+) {
+ const chunkBlobs = new Set()
+ history.findBlobHashes(chunkBlobs)
+
+ await withTmpDir('recovery-blob-', async tmpDir => {
+ const blobStore = new BackupBlobStore(
+ historyId,
+ tmpDir,
+ projectCache,
+ useBackupGlobalBlobs
+ )
+ await blobStore.fetchBlobs(chunkBlobs)
+
+ await history.loadFiles('lazy', blobStore)
+
+ const snapshot = history.getSnapshot()
+ snapshot.applyAll(history.getChanges())
+
+ const filePaths = snapshot.getFilePathnames()
+
+ if (filePaths.length === 0) {
+ logger.warn(
+ { historyId, projectVersion: snapshot.projectVersion },
+ 'No files found in snapshot backup'
+ )
+ }
+ for (const filePath of filePaths) {
+ /** @type {core.File | null | undefined} */
+ const file = snapshot.getFile(filePath)
+ if (!file) {
+ logger.error({ filePath }, 'File not found in snapshot')
+ continue
+ }
+
+ try {
+ await file.load('eager', blobStore)
+ } catch (err) {
+ logger.error(
+ { filePath, err },
+ 'Failed to load file from snapshot, skipping'
+ )
+ continue
+ }
+
+ const hash = file.getHash()
+
+ /** @type {string | fs.ReadStream | null | undefined} */
+ let content = file.getContent({ filterTrackedDeletes: true })
+
+ if (content === null) {
+ if (!hash) {
+ logger.error({ filePath }, 'File does not have a hash')
+ continue
+ }
+ const blob = await blobStore.getBlob(hash)
+ if (!blob) {
+ logger.error({ filePath }, 'Blob not found in blob store')
+ continue
+ }
+ content = await blobStore.getStream(hash)
+ }
+ archive.append(content, {
+ name: `${prefix}${filePath}`,
+ })
+ }
+ })
+}
+
+/**
+ *
+ * @param {string} historyId
+ * @return {Promise}
+ */
+async function findStartVersionOfLatestChunk(historyId) {
+ const backend = chunkStore.getBackend(historyId)
+ const chunk = await backend.getLatestChunk(historyId, { readOnly: true })
+ if (!chunk) {
+ throw new Error('Latest chunk could not be loaded')
+ }
+ return chunk.startVersion
+}
+
+/**
+ * Restore a project from the latest snapshot
+ *
+ * There is an assumption that the database backup has been restored.
+ *
+ * @param {Archiver} archive
+ * @param {string} historyId
+ * @param {boolean} [useBackupGlobalBlobs]
+ * @return {Promise}
+ */
+export async function archiveLatestChunk(
+ archive,
+ historyId,
+ useBackupGlobalBlobs = false
+) {
+ logger.info({ historyId, useBackupGlobalBlobs }, 'Archiving latest chunk')
+
+ const projectCache = await getProjectPersistor(historyId)
+
+ const startVersion = await findStartVersionOfLatestChunk(historyId)
+
+ const backedUpChunkRaw = await loadChunk(
+ historyId,
+ startVersion,
+ projectCache
+ )
+
+ const backedUpChunk = History.fromRaw(backedUpChunkRaw)
+
+ await addChunkToArchive(backedUpChunk, archive, projectCache, historyId, {
+ useBackupGlobalBlobs,
+ })
+
+ return archive
+}
+
+/**
+ * Fetches all raw blobs from the project and adds them to the archive.
+ *
+ * @param {string} historyId
+ * @param {Archiver} archive
+ * @param {CachedPerProjectEncryptedS3Persistor} projectCache
+ * @return {Promise}
+ */
+async function addRawBlobsToArchive(historyId, archive, projectCache) {
+ const key = projectKey.format(historyId)
+ const { contents } = await projectCache.listDirectory(projectBlobsBucket, key)
+ for (const blobRecord of contents) {
+ if (!blobRecord.Key) {
+ logger.debug({ blobRecord }, 'no key')
+ continue
+ }
+ const blobKey = blobRecord.Key
+ try {
+ const stream = await projectCache.getObjectStream(
+ projectBlobsBucket,
+ blobKey,
+ { autoGunzip: true }
+ )
+ archive.append(stream, {
+ name: path.join(historyId, 'blobs', blobKey),
+ })
+ } catch (err) {
+ logger.warn(
+ { err, path: blobRecord.Key },
+ 'Failed to append blob to archive'
+ )
+ }
+ }
+}
+
+/**
+ * Download raw files from the backup.
+ *
+ * This can work without the database being backed up.
+ *
+ * It will split the project into chunks per directory and download the blobs alongside the chunk.
+ *
+ * @param {Archiver} archive
+ * @param {string} historyId
+ * @param {boolean} [useBackupGlobalBlobs]
+ * @return {Promise}
+ */
+export async function archiveRawProject(
+ archive,
+ historyId,
+ useBackupGlobalBlobs = false
+) {
+ const projectCache = await getProjectPersistor(historyId)
+
+ const { contents: chunks } = await projectCache.listDirectory(
+ chunksBucket,
+ projectKey.format(historyId)
+ )
+
+ if (chunks.length === 0) {
+ throw new Error('No chunks found')
+ }
+
+ for (const chunkRecord of chunks) {
+ if (!chunkRecord.Key) {
+ logger.debug({ chunkRecord }, 'no key')
+ continue
+ }
+ const chunkId = chunkRecord.Key.split('/').pop()
+ logger.debug({ chunkId, key: chunkRecord.Key }, 'Processing chunk')
+
+ const { buffer } = await loadChunkByKey(projectCache, chunkRecord.Key)
+
+ archive.append(buffer, {
+ name: `${historyId}/chunks/${chunkId}/chunk.json`,
+ })
+ }
+ await addRawBlobsToArchive(historyId, archive, projectCache)
+}
+
+export class BackupPersistorError extends OError {}
diff --git a/services/history-v1/storage/lib/backupBlob.mjs b/services/history-v1/storage/lib/backupBlob.mjs
index 2602c0818f..8ae1a6a901 100644
--- a/services/history-v1/storage/lib/backupBlob.mjs
+++ b/services/history-v1/storage/lib/backupBlob.mjs
@@ -1,6 +1,6 @@
// @ts-check
import { backupPersistor, projectBlobsBucket } from './backupPersistor.mjs'
-import { GLOBAL_BLOBS, makeProjectKey } from './blob_store/index.js'
+import { GLOBAL_BLOBS, makeProjectKey, BlobStore } from './blob_store/index.js'
import Stream from 'node:stream'
import fs from 'node:fs'
import Crypto from 'node:crypto'
@@ -11,6 +11,7 @@ import logger from '@overleaf/logger/logging-manager.js'
import { AlreadyWrittenError } from '@overleaf/object-persistor/src/Errors.js'
import metrics from '@overleaf/metrics'
import zLib from 'node:zlib'
+import Path from 'node:path'
const HIGHWATER_MARK = 1024 * 1024
@@ -18,6 +19,10 @@ const HIGHWATER_MARK = 1024 * 1024
* @typedef {import("overleaf-editor-core").Blob} Blob
*/
+/**
+ * @typedef {import("@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor").CachedPerProjectEncryptedS3Persistor} CachedPerProjectEncryptedS3Persistor
+ */
+
/**
* Increment a metric to record the outcome of a backup operation.
*
@@ -28,6 +33,34 @@ function recordBackupConclusion(status, reason = 'none') {
metrics.inc('blob_backed_up', 1, { status, reason })
}
+/**
+ * Downloads a blob to a specified directory
+ *
+ * @param {string} historyId - The history ID of the project the blob belongs to
+ * @param {Blob} blob - The blob to download
+ * @param {string} tmpDir - The directory path where the blob will be downloaded
+ * @returns {Promise} The full path where the blob was downloaded
+ */
+export async function downloadBlobToDir(historyId, blob, tmpDir) {
+ const blobStore = new BlobStore(historyId)
+ const blobHash = blob.getHash()
+ const src = await blobStore.getStream(blobHash)
+ const filePath = Path.join(tmpDir, `${historyId}-${blobHash}`)
+ try {
+ const dst = fs.createWriteStream(filePath, {
+ highWaterMark: HIGHWATER_MARK,
+ flags: 'wx',
+ })
+ await Stream.promises.pipeline(src, dst)
+ return filePath
+ } catch (error) {
+ try {
+ await fs.promises.unlink(filePath)
+ } catch {}
+ throw error
+ }
+}
+
/**
* Performs the actual upload of the blob to the backup storage.
*
@@ -36,7 +69,7 @@ function recordBackupConclusion(status, reason = 'none') {
* @param {string} path - The path to the file to upload (should have been stored on disk already)
* @return {Promise}
*/
-export async function uploadBlobToBackup(historyId, blob, path) {
+export async function uploadBlobToBackup(historyId, blob, path, persistor) {
const md5 = Crypto.createHash('md5')
const filePathCompressed = path + '.gz'
let backupSource
@@ -70,7 +103,6 @@ export async function uploadBlobToBackup(historyId, blob, path) {
)
}
const key = makeProjectKey(historyId, blob.getHash())
- const persistor = await backupPersistor.forProject(projectBlobsBucket, key)
await persistor.sendStream(
projectBlobsBucket,
key,
@@ -119,7 +151,7 @@ async function _convertLegacyHistoryIdToProjectId(historyId) {
* @param {string} hash
* @return {Promise}
*/
-async function storeBlobBackup(projectId, hash) {
+export async function storeBlobBackup(projectId, hash) {
await backedUpBlobs.updateOne(
{ _id: new ObjectId(projectId) },
{ $addToSet: { blobs: new Binary(Buffer.from(hash, 'hex')) } },
@@ -152,9 +184,10 @@ export async function _blobIsBackedUp(projectId, hash) {
* @param {string} historyId - history ID for a project (can be postgres format or mongo format)
* @param {Blob} blob - The blob that is being backed up
* @param {string} tmpPath - The path to a temporary file storing the contents of the blob.
+ * @param {CachedPerProjectEncryptedS3Persistor} [persistor] - The persistor to use (optional)
* @return {Promise}
*/
-export async function backupBlob(historyId, blob, tmpPath) {
+export async function backupBlob(historyId, blob, tmpPath, persistor) {
const hash = blob.getHash()
let projectId = historyId
@@ -183,10 +216,22 @@ export async function backupBlob(historyId, blob, tmpPath) {
logger.warn({ error }, 'Failed to check if blob is backed up')
// We'll try anyway - we'll catch the error if it was backed up
}
-
+ // If we weren't passed a persistor for this project, create one.
+ // This will fetch the key from AWS, so it's prefereable to use
+ // the same persistor for all blobs in a project where possible.
+ if (!persistor) {
+ logger.debug(
+ { historyId, hash },
+ 'warning: persistor not passed to backupBlob'
+ )
+ }
+ persistor ??= await backupPersistor.forProject(
+ projectBlobsBucket,
+ makeProjectKey(historyId, '')
+ )
try {
logger.debug({ projectId, hash }, 'Starting blob backup')
- await uploadBlobToBackup(historyId, blob, tmpPath)
+ await uploadBlobToBackup(historyId, blob, tmpPath, persistor)
await storeBlobBackup(projectId, hash)
recordBackupConclusion('success')
} catch (error) {
diff --git a/services/history-v1/storage/lib/backupDeletion.mjs b/services/history-v1/storage/lib/backupDeletion.mjs
index b52c31a160..ef50609753 100644
--- a/services/history-v1/storage/lib/backupDeletion.mjs
+++ b/services/history-v1/storage/lib/backupDeletion.mjs
@@ -51,7 +51,8 @@ async function deleteProjectBackup(projectId) {
throw new NotReadyToDelete('refusing to delete non-expired project')
}
- const historyId = deletedProject.deleterData.deletedProjectOverleafHistoryId
+ const historyId =
+ deletedProject.deleterData.deletedProjectOverleafHistoryId?.toString()
if (!historyId) {
throw new NotReadyToDelete(
'refusing to delete project with unknown historyId'
diff --git a/services/history-v1/storage/lib/backupGenerator.mjs b/services/history-v1/storage/lib/backupGenerator.mjs
new file mode 100644
index 0000000000..d8f1b0e99a
--- /dev/null
+++ b/services/history-v1/storage/lib/backupGenerator.mjs
@@ -0,0 +1,153 @@
+/**
+ * Provides a generator function to back up project chunks and blobs.
+ */
+
+import chunkStore from './chunk_store/index.js'
+
+import {
+ GLOBAL_BLOBS, // NOTE: must call loadGlobalBlobs() before using this
+ BlobStore,
+} from './blob_store/index.js'
+
+import assert from './assert.js'
+
+async function lookBehindForSeenBlobs(
+ projectId,
+ chunk,
+ lastBackedUpVersion,
+ seenBlobs
+) {
+ if (chunk.startVersion === 0) {
+ return // this is the first chunk, no need to check for blobs in the previous chunk
+ }
+ if (chunk.startVersion > 0 && lastBackedUpVersion > chunk.startVersion) {
+ return // the snapshot in this chunk has already been backed up
+ }
+ if (
+ chunk.startVersion > 0 &&
+ lastBackedUpVersion === chunk.startVersion // same as previousChunk.endVersion
+ ) {
+ // the snapshot in this chunk has not been backed up
+ // so we find the set of backed up blobs from the previous chunk
+ const previousChunk = await chunkStore.loadAtVersion(
+ projectId,
+ lastBackedUpVersion,
+ { persistedOnly: true }
+ )
+ const previousChunkHistory = previousChunk.getHistory()
+ previousChunkHistory.findBlobHashes(seenBlobs)
+ }
+}
+
+/**
+ * Records blob hashes that have been previously seen in a chunk's history.
+ *
+ * @param {Object} chunk - The chunk containing history data
+ * @param {number} currentBackedUpVersion - The version number that has been backed up
+ * @param {Set} seenBlobs - Set to collect previously seen blob hashes
+ * @returns {void}
+ */
+function recordPreviouslySeenBlobs(chunk, currentBackedUpVersion, seenBlobs) {
+ // We need to look at the chunk and decide how far we have backed up.
+ // If we have not backed up this chunk at all, we need to backup the blobs
+ // in the snapshot. Otherwise we need to backup the blobs in the changes
+ // that have occurred since the last backup.
+ const history = chunk.getHistory()
+ const startVersion = chunk.getStartVersion()
+ if (currentBackedUpVersion === 0) {
+ // If we have only backed up version 0 (i.e. the first change)
+ // then that includes the initial snapshot, so we consider
+ // the blobs of the initial snapshot as seen. If the project
+ // has not been backed up at all then currentBackedUpVersion
+ // will be undefined.
+ history.snapshot.findBlobHashes(seenBlobs)
+ } else if (currentBackedUpVersion > startVersion) {
+ history.snapshot.findBlobHashes(seenBlobs)
+ for (let i = 0; i < currentBackedUpVersion - startVersion; i++) {
+ history.changes[i].findBlobHashes(seenBlobs)
+ }
+ }
+}
+
+/**
+ * Collects new blob objects that need to be backed up from a given chunk.
+ *
+ * @param {Object} chunk - The chunk object containing history data
+ * @param {Object} blobStore - Storage interface for retrieving blobs
+ * @param {Set} seenBlobs - Set of blob hashes that have already been processed
+ * @returns {Promise} Array of blob objects that need to be backed up
+ * @throws {Error} If blob retrieval fails
+ */
+async function collectNewBlobsForBackup(chunk, blobStore, seenBlobs) {
+ /** @type {Set} */
+ const blobHashes = new Set()
+ const history = chunk.getHistory()
+ // Get all the blobs in this chunk, then exclude the seenBlobs and global blobs
+ history.findBlobHashes(blobHashes)
+ const blobsToBackup = await blobStore.getBlobs(
+ [...blobHashes].filter(
+ hash =>
+ hash &&
+ !seenBlobs.has(hash) &&
+ (!GLOBAL_BLOBS.has(hash) || GLOBAL_BLOBS.get(hash).demoted)
+ )
+ )
+ return blobsToBackup
+}
+
+/**
+ * Asynchronously generates backups for a project based on provided versions.
+ * @param {string} projectId - The ID of the project's history to back up.
+ * @param {number} lastBackedUpVersion - The last version that was successfully backed up.
+ * @yields {AsyncGenerator<{ chunkRecord: object, chunkToBackup: object, chunkBuffer: Buffer, blobsToBackup: object[] }>}
+ * Yields chunk records and corresponding data needed for backups.
+ */
+export async function* backupGenerator(projectId, lastBackedUpVersion) {
+ assert.projectId(projectId, 'bad projectId')
+ assert.maybe.integer(lastBackedUpVersion, 'bad lastBackedUpVersion')
+
+ const blobStore = new BlobStore(projectId)
+
+ /** @type {Set} */
+ const seenBlobs = new Set() // records the blobs that are already backed up
+
+ const firstPendingVersion =
+ lastBackedUpVersion >= 0 ? lastBackedUpVersion + 1 : 0
+ let isStartingChunk = true
+ let currentBackedUpVersion = lastBackedUpVersion
+ const chunkRecordIterator = chunkStore.getProjectChunksFromVersion(
+ projectId,
+ firstPendingVersion
+ )
+
+ for await (const chunkRecord of chunkRecordIterator) {
+ const { chunk, chunkBuffer } = await chunkStore.loadByChunkRecord(
+ projectId,
+ chunkRecord
+ )
+
+ if (isStartingChunk) {
+ await lookBehindForSeenBlobs(
+ projectId,
+ chunkRecord,
+ lastBackedUpVersion,
+ seenBlobs
+ )
+ isStartingChunk = false
+ }
+
+ recordPreviouslySeenBlobs(chunk, currentBackedUpVersion, seenBlobs)
+
+ const blobsToBackup = await collectNewBlobsForBackup(
+ chunk,
+ blobStore,
+ seenBlobs
+ )
+
+ yield { chunkRecord, chunkToBackup: chunk, chunkBuffer, blobsToBackup }
+
+ // After we generate a backup of this chunk, mark the backed up blobs as seen
+ blobsToBackup.forEach(blob => seenBlobs.add(blob.getHash()))
+ currentBackedUpVersion = chunkRecord.endVersion
+ }
+}
diff --git a/services/history-v1/storage/lib/backupPersistor.mjs b/services/history-v1/storage/lib/backupPersistor.mjs
index 72ab9d45e3..8f80e5faaf 100644
--- a/services/history-v1/storage/lib/backupPersistor.mjs
+++ b/services/history-v1/storage/lib/backupPersistor.mjs
@@ -4,6 +4,7 @@ import Path from 'node:path'
import _ from 'lodash'
import config from 'config'
import { SecretManagerServiceClient } from '@google-cloud/secret-manager'
+import OError from '@overleaf/o-error'
import {
PerProjectEncryptedS3Persistor,
RootKeyEncryptionKey,
@@ -55,17 +56,24 @@ if (DELETION_ONLY) {
getRawRootKeyEncryptionKeys = () => new Promise(_resolve => {})
}
+const PROJECT_FOLDER_REGEX =
+ /^\d{3}\/\d{3}\/\d{3,}\/|[0-9a-f]{3}\/[0-9a-f]{3}\/[0-9a-f]{18}\/$/
+
/**
* @param {string} bucketName
* @param {string} path
* @return {string}
*/
-function pathToProjectFolder(bucketName, path) {
+export function pathToProjectFolder(bucketName, path) {
switch (bucketName) {
case deksBucket:
case chunksBucket:
case projectBlobsBucket:
- return Path.join(...path.split('/').slice(0, 3)) + '/'
+ const projectFolder = Path.join(...path.split('/').slice(0, 3)) + '/'
+ if (!PROJECT_FOLDER_REGEX.test(projectFolder)) {
+ throw new OError('invalid project folder', { bucketName, path })
+ }
+ return projectFolder
default:
throw new Error(`${bucketName} does not store per-project files`)
}
diff --git a/services/history-v1/storage/lib/backupVerifier.mjs b/services/history-v1/storage/lib/backupVerifier.mjs
index 55247b91d7..6e767b21ba 100644
--- a/services/history-v1/storage/lib/backupVerifier.mjs
+++ b/services/history-v1/storage/lib/backupVerifier.mjs
@@ -1,14 +1,24 @@
// @ts-check
-import config from 'config'
import OError from '@overleaf/o-error'
-import { backupPersistor, projectBlobsBucket } from './backupPersistor.mjs'
-import { Blob } from 'overleaf-editor-core'
-import { BlobStore, makeProjectKey } from './blob_store/index.js'
+import chunkStore from '../lib/chunk_store/index.js'
+import {
+ backupPersistor,
+ chunksBucket,
+ projectBlobsBucket,
+} from './backupPersistor.mjs'
+import { Blob, Chunk, History } from 'overleaf-editor-core'
+import { BlobStore, GLOBAL_BLOBS, makeProjectKey } from './blob_store/index.js'
import blobHash from './blob_hash.js'
import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
+import logger from '@overleaf/logger'
+import path from 'node:path'
+import projectKey from './project_key.js'
+import streams from './streams.js'
+import objectPersistor from '@overleaf/object-persistor'
+import { getEndDateForRPO } from '../../backupVerifier/utils.mjs'
/**
- * @typedef {import("@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor").CachedPerProjectEncryptedS3Persistor} CachedPerProjectEncryptedS3Persistor
+ * @typedef {import("@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor.js").CachedPerProjectEncryptedS3Persistor} CachedPerProjectEncryptedS3Persistor
*/
/**
@@ -20,13 +30,13 @@ export async function verifyBlob(historyId, hash) {
}
/**
+ *
* @param {string} historyId
- * @param {Array} hashes
+ * @return {Promise}
*/
-export async function verifyBlobs(historyId, hashes) {
- let projectCache
+async function getProjectPersistor(historyId) {
try {
- projectCache = await backupPersistor.forProjectRO(
+ return await backupPersistor.forProjectRO(
projectBlobsBucket,
makeProjectKey(historyId, '')
)
@@ -36,16 +46,19 @@ export async function verifyBlobs(historyId, hashes) {
}
throw err
}
- await verifyBlobsWithCache(historyId, projectCache, hashes)
}
/**
* @param {string} historyId
- * @param {CachedPerProjectEncryptedS3Persistor} projectCache
* @param {Array} hashes
+ * @param {CachedPerProjectEncryptedS3Persistor} [projectCache]
*/
-export async function verifyBlobsWithCache(historyId, projectCache, hashes) {
+export async function verifyBlobs(historyId, hashes, projectCache) {
if (hashes.length === 0) throw new Error('bug: empty hashes')
+
+ if (!projectCache) {
+ projectCache = await getProjectPersistor(historyId)
+ }
const blobStore = new BlobStore(historyId)
for (const hash of hashes) {
const path = makeProjectKey(historyId, hash)
@@ -58,41 +71,150 @@ export async function verifyBlobsWithCache(historyId, projectCache, hashes) {
})
} catch (err) {
if (err instanceof NotFoundError) {
- throw new BackupCorruptedError('missing blob')
+ throw new BackupCorruptedMissingBlobError('missing blob', {
+ path,
+ hash,
+ })
}
throw err
}
const backupHash = await blobHash.fromStream(blob.getByteLength(), stream)
if (backupHash !== hash) {
- throw new BackupCorruptedError('hash mismatch for backed up blob', {
- path,
- hash,
- backupHash,
- })
+ throw new BackupCorruptedInvalidBlobError(
+ 'hash mismatch for backed up blob',
+ {
+ path,
+ hash,
+ backupHash,
+ }
+ )
}
}
}
-export class BackupCorruptedError extends OError {}
-
-export async function healthCheck() {
- /** @type {Array} */
- const HEALTH_CHECK_BLOBS = JSON.parse(config.get('healthCheckBlobs'))
- if (HEALTH_CHECK_BLOBS.length !== 2) {
- throw new Error('expected 2 healthCheckBlobs')
- }
- if (!HEALTH_CHECK_BLOBS.some(path => path.split('/')[0].length === 24)) {
- throw new Error('expected mongo id in healthCheckBlobs')
- }
- if (!HEALTH_CHECK_BLOBS.some(path => path.split('/')[0].length < 24)) {
- throw new Error('expected postgres id in healthCheckBlobs')
- }
- if (HEALTH_CHECK_BLOBS.some(path => path.split('/')[1]?.length !== 40)) {
- throw new Error('expected hash in healthCheckBlobs')
- }
-
- for (const path of HEALTH_CHECK_BLOBS) {
- const [historyId, hash] = path.split('/')
- await verifyBlob(historyId, hash)
+/**
+ * @param {string} historyId
+ * @param {Date} [endTimestamp]
+ */
+export async function verifyProjectWithErrorContext(
+ historyId,
+ endTimestamp = getEndDateForRPO()
+) {
+ try {
+ await verifyProject(historyId, endTimestamp)
+ } catch (err) {
+ // @ts-ignore err is Error instance
+ throw OError.tag(err, 'verifyProject', { historyId, endTimestamp })
}
}
+
+/**
+ *
+ * @param {string} historyId
+ * @param {number} startVersion
+ * @param {CachedPerProjectEncryptedS3Persistor} backupPersistorForProject
+ * @return {Promise}
+ */
+export async function loadChunk(
+ historyId,
+ startVersion,
+ backupPersistorForProject
+) {
+ const key = path.join(
+ projectKey.format(historyId),
+ projectKey.pad(startVersion)
+ )
+ try {
+ const buf = await streams.gunzipStreamToBuffer(
+ await backupPersistorForProject.getObjectStream(chunksBucket, key)
+ )
+ return JSON.parse(buf.toString('utf-8'))
+ } catch (err) {
+ if (err instanceof objectPersistor.Errors.NotFoundError) {
+ throw new Chunk.NotPersistedError(historyId)
+ }
+ if (err instanceof Error) {
+ throw OError.tag(err, 'Failed to load chunk', { historyId, startVersion })
+ }
+ throw err
+ }
+}
+
+/**
+ * @param {string} historyId
+ * @param {Date} endTimestamp
+ */
+export async function verifyProject(historyId, endTimestamp) {
+ const backend = chunkStore.getBackend(historyId)
+ const [first, last] = await Promise.all([
+ backend.getFirstChunkBeforeTimestamp(historyId, endTimestamp),
+ backend.getLastActiveChunkBeforeTimestamp(historyId, endTimestamp),
+ ])
+
+ const chunksRecordsToVerify = [
+ {
+ chunkId: first.id,
+ chunkLabel: 'first',
+ },
+ ]
+ if (first.startVersion !== last.startVersion) {
+ chunksRecordsToVerify.push({
+ chunkId: last.id,
+ chunkLabel: 'last before RPO',
+ })
+ }
+
+ const projectCache = await getProjectPersistor(historyId)
+
+ const chunks = await Promise.all(
+ chunksRecordsToVerify.map(async chunk => {
+ try {
+ return History.fromRaw(
+ await loadChunk(historyId, chunk.startVersion, projectCache)
+ )
+ } catch (err) {
+ if (err instanceof Chunk.NotPersistedError) {
+ throw new BackupRPOViolationChunkNotBackedUpError(
+ 'BackupRPOviolation: chunk not backed up',
+ chunk
+ )
+ }
+ throw err
+ }
+ })
+ )
+ const seenBlobs = new Set()
+ const blobsToVerify = []
+ for (const chunk of chunks) {
+ /** @type {Set} */
+ const chunkBlobs = new Set()
+ chunk.findBlobHashes(chunkBlobs)
+ let hasAddedBlobFromThisChunk = false
+ for (const blobHash of chunkBlobs) {
+ if (seenBlobs.has(blobHash)) continue // old blob
+ if (GLOBAL_BLOBS.has(blobHash)) continue // global blob
+ seenBlobs.add(blobHash)
+ if (!hasAddedBlobFromThisChunk) {
+ blobsToVerify.push(blobHash)
+ hasAddedBlobFromThisChunk = true
+ }
+ }
+ }
+ if (blobsToVerify.length === 0) {
+ logger.debug(
+ {
+ historyId,
+ chunksRecordsToVerify: chunksRecordsToVerify.map(c => c.chunkId),
+ },
+ 'chunks contain no blobs to verify'
+ )
+ return
+ }
+ await verifyBlobs(historyId, blobsToVerify, projectCache)
+}
+
+export class BackupCorruptedError extends OError {}
+export class BackupRPOViolationError extends OError {}
+export class BackupCorruptedMissingBlobError extends BackupCorruptedError {}
+export class BackupCorruptedInvalidBlobError extends BackupCorruptedError {}
+export class BackupRPOViolationChunkNotBackedUpError extends OError {}
diff --git a/services/history-v1/storage/lib/backup_store/index.js b/services/history-v1/storage/lib/backup_store/index.js
new file mode 100644
index 0000000000..da7944786a
--- /dev/null
+++ b/services/history-v1/storage/lib/backup_store/index.js
@@ -0,0 +1,212 @@
+const { Binary, ObjectId } = require('mongodb')
+const { projects, backedUpBlobs } = require('../mongodb')
+const OError = require('@overleaf/o-error')
+
+// List projects with pending backups older than the specified interval
+function listPendingBackups(timeIntervalMs = 0, limit = null) {
+ const cutoffTime = new Date(Date.now() - timeIntervalMs)
+ const options = {
+ projection: { 'overleaf.backup.pendingChangeAt': 1 },
+ sort: { 'overleaf.backup.pendingChangeAt': 1 },
+ }
+
+ // Apply limit if provided
+ if (limit) {
+ options.limit = limit
+ }
+
+ const cursor = projects.find(
+ {
+ 'overleaf.backup.pendingChangeAt': {
+ $exists: true,
+ $lt: cutoffTime,
+ },
+ },
+ options
+ )
+ return cursor
+}
+
+// List projects that have never been backed up and are older than the specified interval
+function listUninitializedBackups(timeIntervalMs = 0, limit = null) {
+ const cutoffTimeInSeconds = (Date.now() - timeIntervalMs) / 1000
+ const options = {
+ projection: { _id: 1 },
+ sort: { _id: 1 },
+ }
+ // Apply limit if provided
+ if (limit) {
+ options.limit = limit
+ }
+ const cursor = projects.find(
+ {
+ 'overleaf.backup.lastBackedUpVersion': null,
+ _id: {
+ $lt: ObjectId.createFromTime(cutoffTimeInSeconds),
+ },
+ },
+ options
+ )
+ return cursor
+}
+
+// Retrieve the history ID for a given project without giving direct access to the
+// projects collection.
+
+async function getHistoryId(projectId) {
+ const project = await projects.findOne(
+ { _id: new ObjectId(projectId) },
+ {
+ projection: {
+ 'overleaf.history.id': 1,
+ },
+ }
+ )
+ if (!project) {
+ throw new Error('Project not found')
+ }
+ return project.overleaf.history.id
+}
+
+async function getBackupStatus(projectId) {
+ const project = await projects.findOne(
+ { _id: new ObjectId(projectId) },
+ {
+ projection: {
+ 'overleaf.history': 1,
+ 'overleaf.backup': 1,
+ },
+ }
+ )
+ if (!project) {
+ throw new Error('Project not found')
+ }
+ return {
+ backupStatus: project.overleaf.backup,
+ historyId: `${project.overleaf.history.id}`,
+ currentEndVersion: project.overleaf.history.currentEndVersion,
+ currentEndTimestamp: project.overleaf.history.currentEndTimestamp,
+ }
+}
+
+async function setBackupVersion(
+ projectId,
+ previousBackedUpVersion,
+ currentBackedUpVersion,
+ currentBackedUpAt
+) {
+ // FIXME: include a check to handle race conditions
+ // to make sure only one process updates the version numbers
+ const result = await projects.updateOne(
+ {
+ _id: new ObjectId(projectId),
+ 'overleaf.backup.lastBackedUpVersion': previousBackedUpVersion,
+ },
+ {
+ $set: {
+ 'overleaf.backup.lastBackedUpVersion': currentBackedUpVersion,
+ 'overleaf.backup.lastBackedUpAt': currentBackedUpAt,
+ },
+ }
+ )
+ if (result.matchedCount === 0 || result.modifiedCount === 0) {
+ throw new OError('Failed to update backup version', {
+ previousBackedUpVersion,
+ currentBackedUpVersion,
+ currentBackedUpAt,
+ result,
+ })
+ }
+}
+
+async function updateCurrentMetadataIfNotSet(projectId, latestChunkMetadata) {
+ await projects.updateOne(
+ {
+ _id: new ObjectId(projectId),
+ 'overleaf.history.currentEndVersion': { $exists: false },
+ 'overleaf.history.currentEndTimestamp': { $exists: false },
+ },
+ {
+ $set: {
+ 'overleaf.history.currentEndVersion': latestChunkMetadata.endVersion,
+ 'overleaf.history.currentEndTimestamp':
+ latestChunkMetadata.endTimestamp,
+ },
+ }
+ )
+}
+
+/**
+ * Updates the pending change timestamp for a project's backup status
+ * @param {string} projectId - The ID of the project to update
+ * @param {Date} backupStartTime - The timestamp to set for pending changes
+ * @returns {Promise}
+ *
+ * If the project's last backed up version matches the current end version,
+ * the pending change timestamp is removed. Otherwise, it's set to the provided
+ * backup start time.
+ */
+async function updatePendingChangeTimestamp(projectId, backupStartTime) {
+ await projects.updateOne({ _id: new ObjectId(projectId) }, [
+ {
+ $set: {
+ 'overleaf.backup.pendingChangeAt': {
+ $cond: {
+ if: {
+ $eq: [
+ '$overleaf.backup.lastBackedUpVersion',
+ '$overleaf.history.currentEndVersion',
+ ],
+ },
+ then: '$$REMOVE',
+ else: backupStartTime,
+ },
+ },
+ },
+ },
+ ])
+}
+
+async function getBackedUpBlobHashes(projectId) {
+ const result = await backedUpBlobs.findOne(
+ { _id: new ObjectId(projectId) },
+ { projection: { blobs: 1 } }
+ )
+ if (!result) {
+ return new Set()
+ }
+ const hashes = result.blobs.map(b => b.buffer.toString('hex'))
+ return new Set(hashes)
+}
+
+async function unsetBackedUpBlobHashes(projectId, hashes) {
+ const binaryHashes = hashes.map(h => new Binary(Buffer.from(h, 'hex')))
+ const result = await backedUpBlobs.findOneAndUpdate(
+ { _id: new ObjectId(projectId) },
+ {
+ $pullAll: {
+ blobs: binaryHashes,
+ },
+ },
+ { returnDocument: 'after' }
+ )
+ if (result && result.blobs.length === 0) {
+ await backedUpBlobs.deleteOne({
+ _id: new ObjectId(projectId),
+ blobs: { $size: 0 },
+ })
+ }
+ return result
+}
+
+module.exports = {
+ getHistoryId,
+ getBackupStatus,
+ setBackupVersion,
+ updateCurrentMetadataIfNotSet,
+ updatePendingChangeTimestamp,
+ listPendingBackups,
+ listUninitializedBackups,
+ getBackedUpBlobHashes,
+ unsetBackedUpBlobHashes,
+}
diff --git a/services/history-v1/storage/lib/blob_hash.js b/services/history-v1/storage/lib/blob_hash.js
index 6309637d08..10ac64b87b 100644
--- a/services/history-v1/storage/lib/blob_hash.js
+++ b/services/history-v1/storage/lib/blob_hash.js
@@ -63,7 +63,7 @@ exports.fromString = function blobHashFromString(string) {
* Compute the git blob hash for the content of a file
*
* @param {string} filePath
- * @return {string} hexadecimal SHA-1 hash
+ * @return {Promise} hexadecimal SHA-1 hash
*/
exports.fromFile = function blobHashFromFile(pathname) {
assert.string(pathname, 'blobHash: bad pathname')
diff --git a/services/history-v1/storage/lib/blob_store/index.js b/services/history-v1/storage/lib/blob_store/index.js
index c770080f7f..033e288554 100644
--- a/services/history-v1/storage/lib/blob_store/index.js
+++ b/services/history-v1/storage/lib/blob_store/index.js
@@ -24,6 +24,7 @@ const logger = require('@overleaf/logger')
/** @import { Readable } from 'stream' */
+/** @type {Map} */
const GLOBAL_BLOBS = new Map()
function makeGlobalKey(hash) {
@@ -343,6 +344,11 @@ class BlobStore {
return blob
}
+ /**
+ *
+ * @param {Array} hashes
+ * @return {Promise<*[]>}
+ */
async getBlobs(hashes) {
assert.array(hashes, 'bad hashes')
const nonGlobalHashes = []
@@ -355,6 +361,9 @@ class BlobStore {
nonGlobalHashes.push(hash)
}
}
+ if (nonGlobalHashes.length === 0) {
+ return blobs // to avoid unnecessary database lookup
+ }
const projectBlobs = await this.backend.findBlobs(
this.projectId,
nonGlobalHashes
@@ -424,6 +433,7 @@ module.exports = {
getProjectBlobsBatch,
loadGlobalBlobs,
makeProjectKey,
+ makeGlobalKey,
makeBlobForFile,
getStringLengthOfFile,
GLOBAL_BLOBS,
diff --git a/services/history-v1/storage/lib/blob_store/postgres.js b/services/history-v1/storage/lib/blob_store/postgres.js
index 7f66d2d24d..1cedeec5d7 100644
--- a/services/history-v1/storage/lib/blob_store/postgres.js
+++ b/services/history-v1/storage/lib/blob_store/postgres.js
@@ -13,7 +13,7 @@ async function initialize(projectId) {
* Return blob metadata for the given project and hash
*/
async function findBlob(projectId, hash) {
- assert.postgresId(projectId, `bad projectId ${projectId}`)
+ assert.postgresId(projectId, 'bad projectId')
projectId = parseInt(projectId, 10)
assert.blobHash(hash, 'bad hash')
@@ -35,7 +35,7 @@ async function findBlob(projectId, hash) {
* @return {Promise.>} no guarantee on order
*/
async function findBlobs(projectId, hashes) {
- assert.postgresId(projectId, `bad projectId ${projectId}`)
+ assert.postgresId(projectId, 'bad projectId')
projectId = parseInt(projectId, 10)
assert.array(hashes, 'bad hashes: not array')
hashes.forEach(function (hash) {
@@ -57,7 +57,7 @@ async function findBlobs(projectId, hashes) {
* Return metadata for all blobs in the given project
*/
async function getProjectBlobs(projectId) {
- assert.postgresId(projectId, `bad projectId ${projectId}`)
+ assert.postgresId(projectId, 'bad projectId')
projectId = parseInt(projectId, 10)
const records = await knex('project_blobs')
@@ -103,7 +103,7 @@ async function getProjectBlobsBatch(projectIds) {
* Add a blob's metadata to the blobs table after it has been uploaded.
*/
async function insertBlob(projectId, blob) {
- assert.postgresId(projectId, `bad projectId ${projectId}`)
+ assert.postgresId(projectId, 'bad projectId')
projectId = parseInt(projectId, 10)
await knex('project_blobs')
@@ -116,7 +116,7 @@ async function insertBlob(projectId, blob) {
* Deletes all blobs for a given project
*/
async function deleteBlobs(projectId) {
- assert.postgresId(projectId, `bad projectId ${projectId}`)
+ assert.postgresId(projectId, 'bad projectId')
projectId = parseInt(projectId, 10)
await knex('project_blobs').where('project_id', projectId).delete()
diff --git a/services/history-v1/storage/lib/chunk_store/errors.js b/services/history-v1/storage/lib/chunk_store/errors.js
index 5f0eba6aac..75b830f9a0 100644
--- a/services/history-v1/storage/lib/chunk_store/errors.js
+++ b/services/history-v1/storage/lib/chunk_store/errors.js
@@ -1,7 +1,15 @@
const OError = require('@overleaf/o-error')
class ChunkVersionConflictError extends OError {}
+class BaseVersionConflictError extends OError {}
+class JobNotFoundError extends OError {}
+class JobNotReadyError extends OError {}
+class VersionOutOfBoundsError extends OError {}
module.exports = {
ChunkVersionConflictError,
+ BaseVersionConflictError,
+ JobNotFoundError,
+ JobNotReadyError,
+ VersionOutOfBoundsError,
}
diff --git a/services/history-v1/storage/lib/chunk_store/index.js b/services/history-v1/storage/lib/chunk_store/index.js
index 9ccf948820..f387b68d90 100644
--- a/services/history-v1/storage/lib/chunk_store/index.js
+++ b/services/history-v1/storage/lib/chunk_store/index.js
@@ -1,3 +1,5 @@
+// @ts-check
+
'use strict'
/**
@@ -30,7 +32,15 @@ const { BlobStore } = require('../blob_store')
const { historyStore } = require('../history_store')
const mongoBackend = require('./mongo')
const postgresBackend = require('./postgres')
-const { ChunkVersionConflictError } = require('./errors')
+const redisBackend = require('./redis')
+const {
+ ChunkVersionConflictError,
+ VersionOutOfBoundsError,
+} = require('./errors')
+
+/**
+ * @import { Change } from 'overleaf-editor-core'
+ */
const DEFAULT_DELETE_BATCH_SIZE = parseInt(config.get('maxDeleteKeys'), 10)
const DEFAULT_DELETE_TIMEOUT_SECS = 3000 // 50 minutes
@@ -82,48 +92,119 @@ async function lazyLoadHistoryFiles(history, batchBlobStore) {
* Load the latest Chunk stored for a project, including blob metadata.
*
* @param {string} projectId
- * @return {Promise.}
+ * @param {Object} [opts]
+ * @param {boolean} [opts.readOnly]
+ * @return {Promise<{id: string, startVersion: number, endVersion: number, endTimestamp: Date}>}
*/
-async function loadLatest(projectId) {
+async function getLatestChunkMetadata(projectId, opts) {
assert.projectId(projectId, 'bad projectId')
const backend = getBackend(projectId)
- const blobStore = new BlobStore(projectId)
- const batchBlobStore = new BatchBlobStore(blobStore)
- const chunkRecord = await backend.getLatestChunk(projectId)
- if (chunkRecord == null) {
+ const chunkMetadata = await backend.getLatestChunk(projectId, opts)
+ if (chunkMetadata == null) {
throw new Chunk.NotFoundError(projectId)
}
+ return chunkMetadata
+}
- const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
+/**
+ * Load the latest Chunk stored for a project, including blob metadata.
+ *
+ * @param {string} projectId
+ * @param {object} [opts]
+ * @param {boolean} [opts.persistedOnly] - only include persisted changes
+ * @return {Promise}
+ */
+async function loadLatest(projectId, opts = {}) {
+ const chunkMetadata = await getLatestChunkMetadata(projectId)
+ const rawHistory = await historyStore.loadRaw(projectId, chunkMetadata.id)
const history = History.fromRaw(rawHistory)
+
+ if (!opts.persistedOnly) {
+ const nonPersistedChanges = await getChunkExtension(
+ projectId,
+ chunkMetadata.endVersion
+ )
+ history.pushChanges(nonPersistedChanges)
+ }
+
+ const blobStore = new BlobStore(projectId)
+ const batchBlobStore = new BatchBlobStore(blobStore)
await lazyLoadHistoryFiles(history, batchBlobStore)
- return new Chunk(history, chunkRecord.startVersion)
+ return new Chunk(history, chunkMetadata.startVersion)
}
/**
* Load the the chunk that contains the given version, including blob metadata.
+ *
+ * @param {string} projectId
+ * @param {number} version
+ * @param {object} [opts]
+ * @param {boolean} [opts.persistedOnly] - only include persisted changes
+ * @param {boolean} [opts.preferNewer] - If the version is at the boundary of
+ * two chunks, return the newer chunk.
*/
-async function loadAtVersion(projectId, version) {
+async function loadAtVersion(projectId, version, opts = {}) {
assert.projectId(projectId, 'bad projectId')
assert.integer(version, 'bad version')
const backend = getBackend(projectId)
const blobStore = new BlobStore(projectId)
const batchBlobStore = new BatchBlobStore(blobStore)
+ const latestChunkMetadata = await getLatestChunkMetadata(projectId)
- const chunkRecord = await backend.getChunkForVersion(projectId, version)
+ // When loading a chunk for a version there are three cases to consider:
+ // 1. If `persistedOnly` is true, we always use the requested version
+ // to fetch the chunk.
+ // 2. If `persistedOnly` is false and the requested version is in the
+ // persisted chunk version range, we use the requested version.
+ // 3. If `persistedOnly` is false and the requested version is ahead of
+ // the persisted chunk versions, we fetch the latest chunk and see if
+ // the non-persisted changes include the requested version.
+ const targetChunkVersion = opts.persistedOnly
+ ? version
+ : Math.min(latestChunkMetadata.endVersion, version)
+
+ const chunkRecord = await backend.getChunkForVersion(
+ projectId,
+ targetChunkVersion,
+ {
+ preferNewer: opts.preferNewer,
+ }
+ )
const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
const history = History.fromRaw(rawHistory)
+ const startVersion = chunkRecord.endVersion - history.countChanges()
+
+ if (!opts.persistedOnly) {
+ // Try to extend the chunk with any non-persisted changes that
+ // follow the chunk's end version.
+ const nonPersistedChanges = await getChunkExtension(
+ projectId,
+ chunkRecord.endVersion
+ )
+ history.pushChanges(nonPersistedChanges)
+
+ // Check that the changes do actually contain the requested version
+ if (version > chunkRecord.endVersion + nonPersistedChanges.length) {
+ throw new Chunk.VersionNotFoundError(projectId, version)
+ }
+ }
+
await lazyLoadHistoryFiles(history, batchBlobStore)
- return new Chunk(history, chunkRecord.endVersion - history.countChanges())
+ return new Chunk(history, startVersion)
}
/**
* Load the chunk that contains the version that was current at the given
* timestamp, including blob metadata.
+ *
+ * @param {string} projectId
+ * @param {Date} timestamp
+ * @param {object} [opts]
+ * @param {boolean} [opts.persistedOnly] - only include persisted changes
*/
-async function loadAtTimestamp(projectId, timestamp) {
+async function loadAtTimestamp(projectId, timestamp, opts = {}) {
assert.projectId(projectId, 'bad projectId')
assert.date(timestamp, 'bad timestamp')
@@ -134,24 +215,58 @@ async function loadAtTimestamp(projectId, timestamp) {
const chunkRecord = await backend.getChunkForTimestamp(projectId, timestamp)
const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
const history = History.fromRaw(rawHistory)
+ const startVersion = chunkRecord.endVersion - history.countChanges()
+
+ if (!opts.persistedOnly) {
+ const nonPersistedChanges = await getChunkExtension(
+ projectId,
+ chunkRecord.endVersion
+ )
+ history.pushChanges(nonPersistedChanges)
+ }
+
await lazyLoadHistoryFiles(history, batchBlobStore)
- return new Chunk(history, chunkRecord.endVersion - history.countChanges())
+ return new Chunk(history, startVersion)
}
/**
* Store the chunk and insert corresponding records in the database.
*
- * @param {number} projectId
+ * @param {string} projectId
* @param {Chunk} chunk
- * @return {Promise.} for the chunkId of the inserted chunk
+ * @param {Date} [earliestChangeTimestamp]
*/
-async function create(projectId, chunk) {
+async function create(projectId, chunk, earliestChangeTimestamp) {
assert.projectId(projectId, 'bad projectId')
assert.instance(chunk, Chunk, 'bad chunk')
+ assert.maybe.date(earliestChangeTimestamp, 'bad timestamp')
const backend = getBackend(projectId)
+ const chunkStart = chunk.getStartVersion()
+
+ const opts = {}
+ if (chunkStart > 0) {
+ const oldChunk = await backend.getChunkForVersion(projectId, chunkStart)
+
+ if (oldChunk.endVersion !== chunkStart) {
+ throw new ChunkVersionConflictError(
+ 'unexpected end version on chunk to be updated',
+ {
+ projectId,
+ expectedVersion: chunkStart,
+ actualVersion: oldChunk.endVersion,
+ }
+ )
+ }
+
+ opts.oldChunkId = oldChunk.id
+ }
+ if (earliestChangeTimestamp != null) {
+ opts.earliestChangeTimestamp = earliestChangeTimestamp
+ }
+
const chunkId = await uploadChunk(projectId, chunk)
- await backend.confirmCreate(projectId, chunk, chunkId)
+ await backend.confirmCreate(projectId, chunk, chunkId, opts)
}
/**
@@ -180,29 +295,67 @@ async function uploadChunk(projectId, chunk) {
* Extend the project's history by replacing the latest chunk with a new
* chunk.
*
- * @param {number} projectId
- * @param {number} oldEndVersion
+ * @param {string} projectId
* @param {Chunk} newChunk
+ * @param {Date} [earliestChangeTimestamp]
* @return {Promise}
*/
-async function update(projectId, oldEndVersion, newChunk) {
+async function update(projectId, newChunk, earliestChangeTimestamp) {
assert.projectId(projectId, 'bad projectId')
- assert.integer(oldEndVersion, 'bad oldEndVersion')
assert.instance(newChunk, Chunk, 'bad newChunk')
+ assert.maybe.date(earliestChangeTimestamp, 'bad timestamp')
const backend = getBackend(projectId)
- const oldChunkId = await getChunkIdForVersion(projectId, oldEndVersion)
+ const oldChunk = await backend.getChunkForVersion(
+ projectId,
+ newChunk.getStartVersion(),
+ { preferNewer: true }
+ )
+
+ if (oldChunk.startVersion !== newChunk.getStartVersion()) {
+ throw new ChunkVersionConflictError(
+ 'unexpected start version on chunk to be updated',
+ {
+ projectId,
+ expectedVersion: newChunk.getStartVersion(),
+ actualVersion: oldChunk.startVersion,
+ }
+ )
+ }
+
+ if (oldChunk.endVersion > newChunk.getEndVersion()) {
+ throw new ChunkVersionConflictError(
+ 'chunk update would decrease chunk version',
+ {
+ projectId,
+ currentVersion: oldChunk.endVersion,
+ newVersion: newChunk.getEndVersion(),
+ }
+ )
+ }
+
const newChunkId = await uploadChunk(projectId, newChunk)
- await backend.confirmUpdate(projectId, oldChunkId, newChunk, newChunkId)
+ const opts = {}
+ if (earliestChangeTimestamp != null) {
+ opts.earliestChangeTimestamp = earliestChangeTimestamp
+ }
+
+ await backend.confirmUpdate(
+ projectId,
+ oldChunk.id,
+ newChunk,
+ newChunkId,
+ opts
+ )
}
/**
* Find the chunk ID for a given version of a project.
*
- * @param {number} projectId
+ * @param {string} projectId
* @param {number} version
- * @return {Promise.}
+ * @return {Promise.}
*/
async function getChunkIdForVersion(projectId, version) {
const backend = getBackend(projectId)
@@ -210,6 +363,19 @@ async function getChunkIdForVersion(projectId, version) {
return chunkRecord.id
}
+/**
+ * Find the chunk metadata for a given version of a project.
+ *
+ * @param {string} projectId
+ * @param {number} version
+ * @return {Promise.<{id: string|number, startVersion: number, endVersion: number}>}
+ */
+async function getChunkMetadataForVersion(projectId, version) {
+ const backend = getBackend(projectId)
+ const chunkRecord = await backend.getChunkForVersion(projectId, version)
+ return chunkRecord
+}
+
/**
* Get all of a project's chunk ids
*/
@@ -219,6 +385,62 @@ async function getProjectChunkIds(projectId) {
return chunkIds
}
+/**
+ * Get all of a projects chunks directly
+ */
+async function getProjectChunks(projectId) {
+ const backend = getBackend(projectId)
+ const chunkIds = await backend.getProjectChunks(projectId)
+ return chunkIds
+}
+
+/**
+ * Load the chunk for a given chunk record, including blob metadata.
+ */
+async function loadByChunkRecord(projectId, chunkRecord) {
+ const blobStore = new BlobStore(projectId)
+ const batchBlobStore = new BatchBlobStore(blobStore)
+ const { raw: rawHistory, buffer: chunkBuffer } =
+ await historyStore.loadRawWithBuffer(projectId, chunkRecord.id)
+ const history = History.fromRaw(rawHistory)
+ await lazyLoadHistoryFiles(history, batchBlobStore)
+ return {
+ chunk: new Chunk(history, chunkRecord.endVersion - history.countChanges()),
+ chunkBuffer,
+ }
+}
+
+/**
+ * Asynchronously retrieves project chunks starting from a specific version.
+ *
+ * This generator function yields chunk records for a given project starting from the specified version (inclusive).
+ * It continues to fetch and yield subsequent chunk records until the end version of the latest chunk metadata is reached.
+ * If you want to fetch all the chunks *after* a version V, call this function with V+1.
+ *
+ * @param {string} projectId - The ID of the project.
+ * @param {number} version - The starting version to retrieve chunks from.
+ * @returns {AsyncGenerator} An async generator that yields chunk records.
+ */
+async function* getProjectChunksFromVersion(projectId, version) {
+ const backend = getBackend(projectId)
+ const latestChunkMetadata = await getLatestChunkMetadata(projectId)
+ if (!latestChunkMetadata || version > latestChunkMetadata.endVersion) {
+ return
+ }
+ let chunkRecord = await backend.getChunkForVersion(projectId, version)
+ while (chunkRecord != null) {
+ yield chunkRecord
+ if (chunkRecord.endVersion >= latestChunkMetadata.endVersion) {
+ break
+ } else {
+ chunkRecord = await backend.getChunkForVersion(
+ projectId,
+ chunkRecord.endVersion + 1
+ )
+ }
+ }
+}
+
/**
* Delete the given chunk from the database.
*
@@ -242,10 +464,14 @@ async function deleteProjectChunks(projectId) {
* Delete a given number of old chunks from both the database
* and from object storage.
*
- * @param {number} count - number of chunks to delete
- * @param {number} minAgeSecs - how many seconds ago must chunks have been
- * deleted
- * @return {Promise}
+ * @param {object} options
+ * @param {number} [options.batchSize] - number of chunks to delete in each
+ * batch
+ * @param {number} [options.maxBatches] - maximum number of batches to process
+ * @param {number} [options.minAgeSecs] - minimum age of chunks to delete
+ * @param {number} [options.timeout] - maximum time to spend deleting chunks
+ *
+ * @return {Promise} number of chunks deleted
*/
async function deleteOldChunks(options = {}) {
const batchSize = options.batchSize ?? DEFAULT_DELETE_BATCH_SIZE
@@ -308,6 +534,31 @@ function getBackend(projectId) {
}
}
+/**
+ * Gets non-persisted changes that could extend a chunk
+ *
+ * @param {string} projectId
+ * @param {number} chunkEndVersion - end version of the chunk to extend
+ *
+ * @return {Promise}
+ */
+async function getChunkExtension(projectId, chunkEndVersion) {
+ try {
+ const changes = await redisBackend.getNonPersistedChanges(
+ projectId,
+ chunkEndVersion
+ )
+ return changes
+ } catch (err) {
+ if (err instanceof VersionOutOfBoundsError) {
+ // If we can't extend the chunk, simply return an empty list
+ return []
+ } else {
+ throw err
+ }
+ }
+}
+
class AlreadyInitialized extends OError {
constructor(projectId) {
super('Project is already initialized', { projectId })
@@ -318,13 +569,18 @@ module.exports = {
getBackend,
initializeProject,
loadLatest,
+ getLatestChunkMetadata,
loadAtVersion,
loadAtTimestamp,
+ loadByChunkRecord,
create,
update,
destroy,
getChunkIdForVersion,
+ getChunkMetadataForVersion,
getProjectChunkIds,
+ getProjectChunks,
+ getProjectChunksFromVersion,
deleteProjectChunks,
deleteOldChunks,
AlreadyInitialized,
diff --git a/services/history-v1/storage/lib/chunk_store/mongo.js b/services/history-v1/storage/lib/chunk_store/mongo.js
index f56131a25b..49020c6be4 100644
--- a/services/history-v1/storage/lib/chunk_store/mongo.js
+++ b/services/history-v1/storage/lib/chunk_store/mongo.js
@@ -1,6 +1,9 @@
-const { ObjectId } = require('mongodb')
+// @ts-check
+
+const { ObjectId, ReadPreference, MongoError } = require('mongodb')
const { Chunk } = require('overleaf-editor-core')
const OError = require('@overleaf/o-error')
+const config = require('config')
const assert = require('../assert')
const mongodb = require('../mongodb')
const { ChunkVersionConflictError } = require('./errors')
@@ -8,14 +11,30 @@ const { ChunkVersionConflictError } = require('./errors')
const DUPLICATE_KEY_ERROR_CODE = 11000
/**
- * Get the latest chunk's metadata from the database
+ * @import { ClientSession } from 'mongodb'
*/
-async function getLatestChunk(projectId) {
+
+/**
+ * Get the latest chunk's metadata from the database
+ * @param {string} projectId
+ * @param {Object} [opts]
+ * @param {boolean} [opts.readOnly]
+ */
+async function getLatestChunk(projectId, opts = {}) {
assert.mongoId(projectId, 'bad projectId')
+ const { readOnly = false } = opts
const record = await mongodb.chunks.findOne(
- { projectId: new ObjectId(projectId), state: 'active' },
- { sort: { startVersion: -1 } }
+ {
+ projectId: new ObjectId(projectId),
+ state: { $in: ['active', 'closed'] },
+ },
+ {
+ sort: { startVersion: -1 },
+ readPreference: readOnly
+ ? ReadPreference.secondaryPreferred
+ : ReadPreference.primary,
+ }
)
if (record == null) {
return null
@@ -25,19 +44,25 @@ async function getLatestChunk(projectId) {
/**
* Get the metadata for the chunk that contains the given version.
+ *
+ * @param {string} projectId
+ * @param {number} version
+ * @param {object} [opts]
+ * @param {boolean} [opts.preferNewer] - If the version is at the boundary of
+ * two chunks, return the newer chunk.
*/
-async function getChunkForVersion(projectId, version) {
+async function getChunkForVersion(projectId, version, opts = {}) {
assert.mongoId(projectId, 'bad projectId')
assert.integer(version, 'bad version')
const record = await mongodb.chunks.findOne(
{
projectId: new ObjectId(projectId),
- state: 'active',
+ state: { $in: ['active', 'closed'] },
startVersion: { $lte: version },
endVersion: { $gte: version },
},
- { sort: { startVersion: 1 } }
+ { sort: { startVersion: opts.preferNewer ? -1 : 1 } }
)
if (record == null) {
throw new Chunk.VersionNotFoundError(projectId, version)
@@ -45,6 +70,35 @@ async function getChunkForVersion(projectId, version) {
return chunkFromRecord(record)
}
+/**
+ * Get the metadata for the chunk that contains the given version before the endTime.
+ */
+async function getFirstChunkBeforeTimestamp(projectId, timestamp) {
+ assert.mongoId(projectId, 'bad projectId')
+ assert.date(timestamp, 'bad timestamp')
+
+ const recordActive = await getChunkForVersion(projectId, 0)
+ if (recordActive && recordActive.endTimestamp <= timestamp) {
+ return recordActive
+ }
+
+ // fallback to deleted chunk
+ const recordDeleted = await mongodb.chunks.findOne(
+ {
+ projectId: new ObjectId(projectId),
+ state: 'deleted',
+ startVersion: 0,
+ updatedAt: { $lte: timestamp }, // indexed for state=deleted
+ endTimestamp: { $lte: timestamp },
+ },
+ { sort: { updatedAt: -1 } }
+ )
+ if (recordDeleted) {
+ return chunkFromRecord(recordDeleted)
+ }
+ throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+}
+
/**
* Get the metadata for the chunk that contains the version that was current at
* the given timestamp.
@@ -56,7 +110,7 @@ async function getChunkForTimestamp(projectId, timestamp) {
const record = await mongodb.chunks.findOne(
{
projectId: new ObjectId(projectId),
- state: 'active',
+ state: { $in: ['active', 'closed'] },
endTimestamp: { $gte: timestamp },
},
// We use the index on the startVersion for sorting records. This assumes
@@ -77,6 +131,39 @@ async function getChunkForTimestamp(projectId, timestamp) {
return chunkFromRecord(record)
}
+/**
+ * Get the metadata for the chunk that contains the version that was current before
+ * the given timestamp.
+ */
+async function getLastActiveChunkBeforeTimestamp(projectId, timestamp) {
+ assert.mongoId(projectId, 'bad projectId')
+ assert.date(timestamp, 'bad timestamp')
+
+ const record = await mongodb.chunks.findOne(
+ {
+ projectId: new ObjectId(projectId),
+ state: { $in: ['active', 'closed'] },
+ $or: [
+ {
+ endTimestamp: {
+ $lte: timestamp,
+ },
+ },
+ {
+ endTimestamp: null,
+ },
+ ],
+ },
+ // We use the index on the startVersion for sorting records. This assumes
+ // that timestamps go up with each version.
+ { sort: { startVersion: -1 } }
+ )
+ if (record == null) {
+ throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+ }
+ return chunkFromRecord(record)
+}
+
/**
* Get all of a project's chunk ids
*/
@@ -84,12 +171,33 @@ async function getProjectChunkIds(projectId) {
assert.mongoId(projectId, 'bad projectId')
const cursor = mongodb.chunks.find(
- { projectId: new ObjectId(projectId), state: 'active' },
+ {
+ projectId: new ObjectId(projectId),
+ state: { $in: ['active', 'closed'] },
+ },
{ projection: { _id: 1 } }
)
return await cursor.map(record => record._id).toArray()
}
+/**
+ * Get all of a projects chunks directly
+ */
+async function getProjectChunks(projectId) {
+ assert.mongoId(projectId, 'bad projectId')
+
+ const cursor = mongodb.chunks
+ .find(
+ {
+ projectId: new ObjectId(projectId),
+ state: { $in: ['active', 'closed'] },
+ },
+ { projection: { state: 0 } }
+ )
+ .sort({ startVersion: 1 })
+ return await cursor.map(chunkFromRecord).toArray()
+}
+
/**
* Insert a pending chunk before sending it to object storage.
*/
@@ -112,10 +220,141 @@ async function insertPendingChunk(projectId, chunk) {
/**
* Record that a new chunk was created.
+ *
+ * @param {string} projectId
+ * @param {Chunk} chunk
+ * @param {string} chunkId
+ * @param {object} opts
+ * @param {Date} [opts.earliestChangeTimestamp]
+ * @param {string} [opts.oldChunkId]
*/
-async function confirmCreate(projectId, chunk, chunkId, mongoOpts = {}) {
+async function confirmCreate(projectId, chunk, chunkId, opts = {}) {
+ assert.mongoId(projectId, 'bad projectId')
+ assert.instance(chunk, Chunk, 'bad newChunk')
+ assert.mongoId(chunkId, 'bad newChunkId')
+
+ await mongodb.client.withSession(async session => {
+ await session.withTransaction(async () => {
+ if (opts.oldChunkId != null) {
+ await closeChunk(projectId, opts.oldChunkId, { session })
+ }
+
+ await activateChunk(projectId, chunkId, { session })
+
+ await updateProjectRecord(
+ projectId,
+ chunk,
+ opts.earliestChangeTimestamp,
+ { session }
+ )
+ })
+ })
+}
+
+/**
+ * Write the metadata to the project record
+ */
+async function updateProjectRecord(
+ projectId,
+ chunk,
+ earliestChangeTimestamp,
+ mongoOpts = {}
+) {
+ if (!config.has('backupStore')) {
+ return
+ }
+ // record the end version against the project
+ await mongodb.projects.updateOne(
+ {
+ 'overleaf.history.id': projectId, // string for Object ids, number for postgres ids
+ },
+ {
+ // always store the latest end version and timestamp for the chunk
+ $max: {
+ 'overleaf.history.currentEndVersion': chunk.getEndVersion(),
+ 'overleaf.history.currentEndTimestamp': chunk.getEndTimestamp(),
+ 'overleaf.history.updatedAt': new Date(),
+ },
+ // store the first pending change timestamp for the chunk, this will
+ // be cleared every time a backup is completed.
+ $min: {
+ 'overleaf.backup.pendingChangeAt':
+ earliestChangeTimestamp || chunk.getEndTimestamp() || new Date(),
+ },
+ },
+ mongoOpts
+ )
+}
+
+/**
+ * @param {number} historyId
+ * @return {Promise}
+ */
+async function lookupMongoProjectIdFromHistoryId(historyId) {
+ const project = await mongodb.projects.findOne(
+ // string for Object ids, number for postgres ids
+ { 'overleaf.history.id': historyId },
+ { projection: { _id: 1 } }
+ )
+ if (!project) {
+ // should not happen: We flush before allowing a project to be soft-deleted.
+ throw new OError('mongo project not found by history id', { historyId })
+ }
+ return project._id.toString()
+}
+
+async function resolveHistoryIdToMongoProjectId(projectId) {
+ return projectId
+}
+
+/**
+ * Record that a chunk was replaced by a new one.
+ *
+ * @param {string} projectId
+ * @param {string} oldChunkId
+ * @param {Chunk} newChunk
+ * @param {string} newChunkId
+ * @param {object} [opts]
+ * @param {Date} [opts.earliestChangeTimestamp]
+ */
+async function confirmUpdate(
+ projectId,
+ oldChunkId,
+ newChunk,
+ newChunkId,
+ opts = {}
+) {
+ assert.mongoId(projectId, 'bad projectId')
+ assert.mongoId(oldChunkId, 'bad oldChunkId')
+ assert.instance(newChunk, Chunk, 'bad newChunk')
+ assert.mongoId(newChunkId, 'bad newChunkId')
+
+ await mongodb.client.withSession(async session => {
+ await session.withTransaction(async () => {
+ await deleteActiveChunk(projectId, oldChunkId, { session })
+
+ await activateChunk(projectId, newChunkId, { session })
+
+ await updateProjectRecord(
+ projectId,
+ newChunk,
+ opts.earliestChangeTimestamp,
+ { session }
+ )
+ })
+ })
+}
+
+/**
+ * Activate a pending chunk
+ *
+ * @param {string} projectId
+ * @param {string} chunkId
+ * @param {object} [opts]
+ * @param {ClientSession} [opts.session]
+ */
+async function activateChunk(projectId, chunkId, opts = {}) {
assert.mongoId(projectId, 'bad projectId')
- assert.instance(chunk, Chunk, 'bad chunk')
assert.mongoId(chunkId, 'bad chunkId')
let result
@@ -127,10 +366,10 @@ async function confirmCreate(projectId, chunk, chunkId, mongoOpts = {}) {
state: 'pending',
},
{ $set: { state: 'active', updatedAt: new Date() } },
- mongoOpts
+ opts
)
} catch (err) {
- if (err.code === DUPLICATE_KEY_ERROR_CODE) {
+ if (err instanceof MongoError && err.code === DUPLICATE_KEY_ERROR_CODE) {
throw new ChunkVersionConflictError('chunk start version is not unique', {
projectId,
chunkId,
@@ -145,30 +384,70 @@ async function confirmCreate(projectId, chunk, chunkId, mongoOpts = {}) {
}
/**
- * Record that a chunk was replaced by a new one.
+ * Close a chunk
+ *
+ * A closed chunk is one that can't be extended anymore.
+ *
+ * @param {string} projectId
+ * @param {string} chunkId
+ * @param {object} [opts]
+ * @param {ClientSession} [opts.session]
*/
-async function confirmUpdate(projectId, oldChunkId, newChunk, newChunkId) {
- assert.mongoId(projectId, 'bad projectId')
- assert.mongoId(oldChunkId, 'bad oldChunkId')
- assert.instance(newChunk, Chunk, 'bad newChunk')
- assert.mongoId(newChunkId, 'bad newChunkId')
+async function closeChunk(projectId, chunkId, opts = {}) {
+ const result = await mongodb.chunks.updateOne(
+ {
+ _id: new ObjectId(chunkId),
+ projectId: new ObjectId(projectId),
+ state: 'active',
+ },
+ { $set: { state: 'closed' } },
+ opts
+ )
- const session = mongodb.client.startSession()
- try {
- await session.withTransaction(async () => {
- await deleteChunk(projectId, oldChunkId, { session })
- await confirmCreate(projectId, newChunk, newChunkId, { session })
+ if (result.matchedCount === 0) {
+ throw new ChunkVersionConflictError('unable to close chunk', {
+ projectId,
+ chunkId,
+ })
+ }
+}
+
+/**
+ * Delete an active chunk
+ *
+ * This is used to delete chunks that are in the process of being extended. It
+ * will refuse to delete chunks that are already closed and can therefore not be
+ * extended.
+ *
+ * @param {string} projectId
+ * @param {string} chunkId
+ * @param {object} [opts]
+ * @param {ClientSession} [opts.session]
+ */
+async function deleteActiveChunk(projectId, chunkId, opts = {}) {
+ const updateResult = await mongodb.chunks.updateOne(
+ {
+ _id: new ObjectId(chunkId),
+ projectId: new ObjectId(projectId),
+ state: 'active',
+ },
+ { $set: { state: 'deleted', updatedAt: new Date() } },
+ opts
+ )
+
+ if (updateResult.matchedCount === 0) {
+ throw new ChunkVersionConflictError('unable to delete active chunk', {
+ projectId,
+ chunkId,
})
- } finally {
- await session.endSession()
}
}
/**
* Delete a chunk.
*
- * @param {number} projectId
- * @param {number} chunkId
+ * @param {string} projectId
+ * @param {string} chunkId
* @return {Promise}
*/
async function deleteChunk(projectId, chunkId, mongoOpts = {}) {
@@ -189,7 +468,10 @@ async function deleteProjectChunks(projectId) {
assert.mongoId(projectId, 'bad projectId')
await mongodb.chunks.updateMany(
- { projectId: new ObjectId(projectId), state: 'active' },
+ {
+ projectId: new ObjectId(projectId),
+ state: { $in: ['active', 'closed'] },
+ },
{ $set: { state: 'deleted', updatedAt: new Date() } }
)
}
@@ -252,19 +534,26 @@ function chunkFromRecord(record) {
id: record._id.toString(),
startVersion: record.startVersion,
endVersion: record.endVersion,
+ endTimestamp: record.endTimestamp,
}
}
module.exports = {
getLatestChunk,
+ getFirstChunkBeforeTimestamp,
+ getLastActiveChunkBeforeTimestamp,
getChunkForVersion,
getChunkForTimestamp,
getProjectChunkIds,
+ getProjectChunks,
insertPendingChunk,
confirmCreate,
confirmUpdate,
+ updateProjectRecord,
deleteChunk,
deleteProjectChunks,
getOldChunksBatch,
deleteOldChunks,
+ lookupMongoProjectIdFromHistoryId,
+ resolveHistoryIdToMongoProjectId,
}
diff --git a/services/history-v1/storage/lib/chunk_store/postgres.js b/services/history-v1/storage/lib/chunk_store/postgres.js
index f6eead7354..8906db38e1 100644
--- a/services/history-v1/storage/lib/chunk_store/postgres.js
+++ b/services/history-v1/storage/lib/chunk_store/postgres.js
@@ -1,19 +1,33 @@
+// @ts-check
+
const { Chunk } = require('overleaf-editor-core')
const assert = require('../assert')
const knex = require('../knex')
+const knexReadOnly = require('../knex_read_only')
const { ChunkVersionConflictError } = require('./errors')
+const {
+ updateProjectRecord,
+ lookupMongoProjectIdFromHistoryId,
+} = require('./mongo')
const DUPLICATE_KEY_ERROR_CODE = '23505'
/**
- * Get the latest chunk's metadata from the database
+ * @import { Knex } from 'knex'
*/
-async function getLatestChunk(projectId) {
- projectId = parseInt(projectId, 10)
- assert.integer(projectId, 'bad projectId')
- const record = await knex('chunks')
- .where('doc_id', projectId)
+/**
+ * Get the latest chunk's metadata from the database
+ * @param {string} projectId
+ * @param {Object} [opts]
+ * @param {boolean} [opts.readOnly]
+ */
+async function getLatestChunk(projectId, opts = {}) {
+ assert.postgresId(projectId, 'bad projectId')
+ const { readOnly = false } = opts
+
+ const record = await (readOnly ? knexReadOnly : knex)('chunks')
+ .where('doc_id', parseInt(projectId, 10))
.orderBy('end_version', 'desc')
.first()
if (record == null) {
@@ -24,15 +38,21 @@ async function getLatestChunk(projectId) {
/**
* Get the metadata for the chunk that contains the given version.
+ *
+ * @param {string} projectId
+ * @param {number} version
+ * @param {object} [opts]
+ * @param {boolean} [opts.preferNewer] - If the version is at the boundary of
+ * two chunks, return the newer chunk.
*/
-async function getChunkForVersion(projectId, version) {
- projectId = parseInt(projectId, 10)
- assert.integer(projectId, 'bad projectId')
+async function getChunkForVersion(projectId, version, opts = {}) {
+ assert.postgresId(projectId, 'bad projectId')
const record = await knex('chunks')
- .where('doc_id', projectId)
+ .where('doc_id', parseInt(projectId, 10))
+ .where('start_version', '<=', version)
.where('end_version', '>=', version)
- .orderBy('end_version')
+ .orderBy('end_version', opts.preferNewer ? 'desc' : 'asc')
.first()
if (!record) {
throw new Chunk.VersionNotFoundError(projectId, version)
@@ -40,13 +60,73 @@ async function getChunkForVersion(projectId, version) {
return chunkFromRecord(record)
}
+/**
+ * Get the metadata for the chunk that contains the given version.
+ *
+ * @param {string} projectId
+ * @param {Date} timestamp
+ */
+async function getFirstChunkBeforeTimestamp(projectId, timestamp) {
+ assert.date(timestamp, 'bad timestamp')
+
+ const recordActive = await getChunkForVersion(projectId, 0)
+
+ // projectId must be valid if getChunkForVersion did not throw
+ if (recordActive && recordActive.endTimestamp <= timestamp) {
+ return recordActive
+ }
+
+ // fallback to deleted chunk
+ const recordDeleted = await knex('old_chunks')
+ .where('doc_id', parseInt(projectId, 10))
+ .where('start_version', '=', 0)
+ .where('end_timestamp', '<=', timestamp)
+ .orderBy('end_version', 'desc')
+ .first()
+ if (recordDeleted) {
+ return chunkFromRecord(recordDeleted)
+ }
+ throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+}
+
/**
* Get the metadata for the chunk that contains the version that was current at
* the given timestamp.
+ *
+ * @param {string} projectId
+ * @param {Date} timestamp
+ */
+async function getLastActiveChunkBeforeTimestamp(projectId, timestamp) {
+ assert.date(timestamp, 'bad timestamp')
+ assert.postgresId(projectId, 'bad projectId')
+
+ const query = knex('chunks')
+ .where('doc_id', parseInt(projectId, 10))
+ .where(function () {
+ this.where('end_timestamp', '<=', timestamp).orWhere(
+ 'end_timestamp',
+ null
+ )
+ })
+ .orderBy('end_version', 'desc', 'last')
+
+ const record = await query.first()
+
+ if (!record) {
+ throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+ }
+ return chunkFromRecord(record)
+}
+
+/**
+ * Get the metadata for the chunk that contains the version that was current at
+ * the given timestamp.
+ *
+ * @param {string} projectId
+ * @param {Date} timestamp
*/
async function getChunkForTimestamp(projectId, timestamp) {
- projectId = parseInt(projectId, 10)
- assert.integer(projectId, 'bad projectId')
+ assert.postgresId(projectId, 'bad projectId')
// This query will find the latest chunk after the timestamp (query orders
// in reverse chronological order), OR the latest chunk
@@ -59,11 +139,11 @@ async function getChunkForTimestamp(projectId, timestamp) {
'WHERE doc_id = ? ' +
'ORDER BY end_version desc LIMIT 1' +
')',
- [timestamp, projectId]
+ [timestamp, parseInt(projectId, 10)]
)
const record = await knex('chunks')
- .where('doc_id', projectId)
+ .where('doc_id', parseInt(projectId, 10))
.where(whereAfterEndTimestampOrLatestChunk)
.orderBy('end_version')
.first()
@@ -78,29 +158,50 @@ async function getChunkForTimestamp(projectId, timestamp) {
*/
function chunkFromRecord(record) {
return {
- id: record.id,
+ id: record.id.toString(),
startVersion: record.start_version,
endVersion: record.end_version,
+ endTimestamp: record.end_timestamp,
}
}
/**
* Get all of a project's chunk ids
+ *
+ * @param {string} projectId
*/
async function getProjectChunkIds(projectId) {
- projectId = parseInt(projectId, 10)
- assert.integer(projectId, 'bad projectId')
+ assert.postgresId(projectId, 'bad projectId')
- const records = await knex('chunks').select('id').where('doc_id', projectId)
+ const records = await knex('chunks')
+ .select('id')
+ .where('doc_id', parseInt(projectId, 10))
return records.map(record => record.id)
}
+/**
+ * Get all of a projects chunks directly
+ *
+ * @param {string} projectId
+ */
+async function getProjectChunks(projectId) {
+ assert.postgresId(projectId, 'bad projectId')
+
+ const records = await knex('chunks')
+ .select()
+ .where('doc_id', parseInt(projectId, 10))
+ .orderBy('end_version')
+ return records.map(chunkFromRecord)
+}
+
/**
* Insert a pending chunk before sending it to object storage.
+ *
+ * @param {string} projectId
+ * @param {Chunk} chunk
*/
async function insertPendingChunk(projectId, chunk) {
- projectId = parseInt(projectId, 10)
- assert.integer(projectId, 'bad projectId')
+ assert.postgresId(projectId, 'bad projectId')
const result = await knex.first(
knex.raw("nextval('chunks_id_seq'::regclass)::integer as chunkid")
@@ -108,67 +209,119 @@ async function insertPendingChunk(projectId, chunk) {
const chunkId = result.chunkid
await knex('pending_chunks').insert({
id: chunkId,
- doc_id: projectId,
+ doc_id: parseInt(projectId, 10),
end_version: chunk.getEndVersion(),
start_version: chunk.getStartVersion(),
end_timestamp: chunk.getEndTimestamp(),
})
- return chunkId
+ return chunkId.toString()
}
/**
* Record that a new chunk was created.
+ *
+ * @param {string} projectId
+ * @param {Chunk} chunk
+ * @param {string} chunkId
+ * @param {object} opts
+ * @param {Date} [opts.earliestChangeTimestamp]
+ * @param {string} [opts.oldChunkId]
*/
-async function confirmCreate(projectId, chunk, chunkId) {
- projectId = parseInt(projectId, 10)
- assert.integer(projectId, 'bad projectId')
+async function confirmCreate(projectId, chunk, chunkId, opts = {}) {
+ assert.postgresId(projectId, 'bad projectId')
await knex.transaction(async tx => {
+ if (opts.oldChunkId != null) {
+ await _assertChunkIsNotClosed(tx, projectId, opts.oldChunkId)
+ await _closeChunk(tx, projectId, opts.oldChunkId)
+ }
await Promise.all([
_deletePendingChunk(tx, projectId, chunkId),
_insertChunk(tx, projectId, chunk, chunkId),
])
+ await updateProjectRecord(
+ // The history id in Mongo is an integer for Postgres projects
+ parseInt(projectId, 10),
+ chunk,
+ opts.earliestChangeTimestamp
+ )
})
}
/**
* Record that a chunk was replaced by a new one.
+ *
+ * @param {string} projectId
+ * @param {string} oldChunkId
+ * @param {Chunk} newChunk
+ * @param {string} newChunkId
*/
-async function confirmUpdate(projectId, oldChunkId, newChunk, newChunkId) {
- projectId = parseInt(projectId, 10)
- assert.integer(projectId, 'bad projectId')
+async function confirmUpdate(
+ projectId,
+ oldChunkId,
+ newChunk,
+ newChunkId,
+ opts = {}
+) {
+ assert.postgresId(projectId, 'bad projectId')
await knex.transaction(async tx => {
+ await _assertChunkIsNotClosed(tx, projectId, oldChunkId)
await _deleteChunks(tx, { doc_id: projectId, id: oldChunkId })
await Promise.all([
_deletePendingChunk(tx, projectId, newChunkId),
_insertChunk(tx, projectId, newChunk, newChunkId),
])
+ await updateProjectRecord(
+ // The history id in Mongo is an integer for Postgres projects
+ parseInt(projectId, 10),
+ newChunk,
+ opts.earliestChangeTimestamp
+ )
})
}
+/**
+ * Delete a pending chunk
+ *
+ * @param {Knex} tx
+ * @param {string} projectId
+ * @param {string} chunkId
+ */
async function _deletePendingChunk(tx, projectId, chunkId) {
await tx('pending_chunks')
.where({
- doc_id: projectId,
- id: chunkId,
+ doc_id: parseInt(projectId, 10),
+ id: parseInt(chunkId, 10),
})
.del()
}
+/**
+ * Adds an active chunk
+ *
+ * @param {Knex} tx
+ * @param {string} projectId
+ * @param {Chunk} chunk
+ * @param {string} chunkId
+ */
async function _insertChunk(tx, projectId, chunk, chunkId) {
const startVersion = chunk.getStartVersion()
const endVersion = chunk.getEndVersion()
try {
await tx('chunks').insert({
- id: chunkId,
- doc_id: projectId,
+ id: parseInt(chunkId, 10),
+ doc_id: parseInt(projectId, 10),
start_version: startVersion,
end_version: endVersion,
end_timestamp: chunk.getEndTimestamp(),
})
} catch (err) {
- if (err.code === DUPLICATE_KEY_ERROR_CODE) {
+ if (
+ err instanceof Error &&
+ 'code' in err &&
+ err.code === DUPLICATE_KEY_ERROR_CODE
+ ) {
throw new ChunkVersionConflictError(
'chunk start or end version is not unique',
{ projectId, chunkId, startVersion, endVersion }
@@ -178,35 +331,92 @@ async function _insertChunk(tx, projectId, chunk, chunkId) {
}
}
+/**
+ * Check that a chunk is not closed
+ *
+ * This is used to synchronize chunk creations and extensions.
+ *
+ * @param {Knex} tx
+ * @param {string} projectId
+ * @param {string} chunkId
+ */
+async function _assertChunkIsNotClosed(tx, projectId, chunkId) {
+ const record = await tx('chunks')
+ .forUpdate()
+ .select('closed')
+ .where('doc_id', parseInt(projectId, 10))
+ .where('id', parseInt(chunkId, 10))
+ .first()
+ if (!record) {
+ throw new ChunkVersionConflictError('unable to close chunk: not found', {
+ projectId,
+ chunkId,
+ })
+ }
+ if (record.closed) {
+ throw new ChunkVersionConflictError(
+ 'unable to close chunk: already closed',
+ {
+ projectId,
+ chunkId,
+ }
+ )
+ }
+}
+
+/**
+ * Close a chunk
+ *
+ * A closed chunk can no longer be extended.
+ *
+ * @param {Knex} tx
+ * @param {string} projectId
+ * @param {string} chunkId
+ */
+async function _closeChunk(tx, projectId, chunkId) {
+ await tx('chunks')
+ .update({ closed: true })
+ .where('doc_id', parseInt(projectId, 10))
+ .where('id', parseInt(chunkId, 10))
+}
+
/**
* Delete a chunk.
*
- * @param {number} projectId
- * @param {number} chunkId
- * @return {Promise}
+ * @param {string} projectId
+ * @param {string} chunkId
*/
async function deleteChunk(projectId, chunkId) {
- projectId = parseInt(projectId, 10)
- assert.integer(projectId, 'bad projectId')
+ assert.postgresId(projectId, 'bad projectId')
assert.integer(chunkId, 'bad chunkId')
- await _deleteChunks(knex, { doc_id: projectId, id: chunkId })
+ await _deleteChunks(knex, {
+ doc_id: parseInt(projectId, 10),
+ id: parseInt(chunkId, 10),
+ })
}
/**
* Delete all of a project's chunks
+ *
+ * @param {string} projectId
*/
async function deleteProjectChunks(projectId) {
- projectId = parseInt(projectId, 10)
- assert.integer(projectId, 'bad projectId')
+ assert.postgresId(projectId, 'bad projectId')
await knex.transaction(async tx => {
- await _deleteChunks(knex, { doc_id: projectId })
+ await _deleteChunks(knex, { doc_id: parseInt(projectId, 10) })
})
}
+/**
+ * Delete many chunks
+ *
+ * @param {Knex} tx
+ * @param {any} whereClause
+ */
async function _deleteChunks(tx, whereClause) {
- const rows = await tx('chunks').returning('*').where(whereClause).del()
+ const rows = await tx('chunks').where(whereClause).del().returning('*')
if (rows.length === 0) {
return
}
@@ -224,6 +434,9 @@ async function _deleteChunks(tx, whereClause) {
/**
* Get a batch of old chunks for deletion
+ *
+ * @param {number} count
+ * @param {number} minAgeSecs
*/
async function getOldChunksBatch(count, minAgeSecs) {
const maxDeletedAt = new Date(Date.now() - minAgeSecs * 1000)
@@ -234,15 +447,22 @@ async function getOldChunksBatch(count, minAgeSecs) {
.limit(count)
return records.map(oldChunk => ({
projectId: oldChunk.doc_id.toString(),
- chunkId: oldChunk.chunk_id,
+ chunkId: oldChunk.chunk_id.toString(),
}))
}
/**
* Delete a batch of old chunks from the database
+ *
+ * @param {string[]} chunkIds
*/
async function deleteOldChunks(chunkIds) {
- await knex('old_chunks').whereIn('chunk_id', chunkIds).del()
+ await knex('old_chunks')
+ .whereIn(
+ 'chunk_id',
+ chunkIds.map(id => parseInt(id, 10))
+ )
+ .del()
}
/**
@@ -255,11 +475,18 @@ async function generateProjectId() {
return record.doc_id.toString()
}
+async function resolveHistoryIdToMongoProjectId(projectId) {
+ return await lookupMongoProjectIdFromHistoryId(parseInt(projectId, 10))
+}
+
module.exports = {
getLatestChunk,
+ getFirstChunkBeforeTimestamp,
+ getLastActiveChunkBeforeTimestamp,
getChunkForVersion,
getChunkForTimestamp,
getProjectChunkIds,
+ getProjectChunks,
insertPendingChunk,
confirmCreate,
confirmUpdate,
@@ -268,4 +495,5 @@ module.exports = {
getOldChunksBatch,
deleteOldChunks,
generateProjectId,
+ resolveHistoryIdToMongoProjectId,
}
diff --git a/services/history-v1/storage/lib/chunk_store/redis.js b/services/history-v1/storage/lib/chunk_store/redis.js
new file mode 100644
index 0000000000..b8a79b498d
--- /dev/null
+++ b/services/history-v1/storage/lib/chunk_store/redis.js
@@ -0,0 +1,854 @@
+// @ts-check
+
+const metrics = require('@overleaf/metrics')
+const OError = require('@overleaf/o-error')
+const { Change, Snapshot } = require('overleaf-editor-core')
+const redis = require('../redis')
+const rclient = redis.rclientHistory
+const {
+ BaseVersionConflictError,
+ JobNotFoundError,
+ JobNotReadyError,
+ VersionOutOfBoundsError,
+} = require('./errors')
+
+const MAX_PERSISTED_CHANGES = 100 // Maximum number of persisted changes to keep in the buffer for clients that need to catch up.
+const PROJECT_TTL_MS = 3600 * 1000 // Amount of time a project can stay inactive before it gets expired
+const MAX_PERSIST_DELAY_MS = 300 * 1000 // Maximum amount of time before a change is persisted
+const RETRY_DELAY_MS = 120 * 1000 // Time before a claimed job is considered stale and a worker can retry it.
+
+const keySchema = {
+ head({ projectId }) {
+ return `head:{${projectId}}`
+ },
+ headVersion({ projectId }) {
+ return `head-version:{${projectId}}`
+ },
+ persistedVersion({ projectId }) {
+ return `persisted-version:{${projectId}}`
+ },
+ expireTime({ projectId }) {
+ return `expire-time:{${projectId}}`
+ },
+ persistTime({ projectId }) {
+ return `persist-time:{${projectId}}`
+ },
+ changes({ projectId }) {
+ return `changes:{${projectId}}`
+ },
+}
+
+rclient.defineCommand('get_head_snapshot', {
+ numberOfKeys: 2,
+ lua: `
+ local headSnapshotKey = KEYS[1]
+ local headVersionKey = KEYS[2]
+
+ -- Check if the head version exists. If not, consider it a cache miss.
+ local version = redis.call('GET', headVersionKey)
+ if not version then
+ return nil
+ end
+
+ -- Retrieve the snapshot value
+ local snapshot = redis.call('GET', headSnapshotKey)
+ return {snapshot, version}
+ `,
+})
+
+/**
+ * Retrieves the head snapshot from Redis storage
+ * @param {string} projectId - The unique identifier of the project
+ * @returns {Promise<{version: number, snapshot: Snapshot}|null>} A Promise that resolves to an object containing the version and Snapshot,
+ * or null if retrieval fails or cache miss
+ * @throws {Error} If Redis operations fail
+ */
+async function getHeadSnapshot(projectId) {
+ try {
+ const result = await rclient.get_head_snapshot(
+ keySchema.head({ projectId }),
+ keySchema.headVersion({ projectId })
+ )
+ if (!result) {
+ metrics.inc('chunk_store.redis.get_head_snapshot', 1, {
+ status: 'cache-miss',
+ })
+ return null // cache-miss
+ }
+ const snapshot = Snapshot.fromRaw(JSON.parse(result[0]))
+ const version = parseInt(result[1], 10)
+ metrics.inc('chunk_store.redis.get_head_snapshot', 1, {
+ status: 'success',
+ })
+ return { version, snapshot }
+ } catch (err) {
+ metrics.inc('chunk_store.redis.get_head_snapshot', 1, { status: 'error' })
+ throw err
+ }
+}
+
+rclient.defineCommand('queue_changes', {
+ numberOfKeys: 5,
+ lua: `
+ local headSnapshotKey = KEYS[1]
+ local headVersionKey = KEYS[2]
+ local changesKey = KEYS[3]
+ local expireTimeKey = KEYS[4]
+ local persistTimeKey = KEYS[5]
+
+ local baseVersion = tonumber(ARGV[1])
+ local head = ARGV[2]
+ local persistTime = tonumber(ARGV[3])
+ local expireTime = tonumber(ARGV[4])
+ local onlyIfExists = ARGV[5]
+ local changesIndex = 6 -- Changes start here
+
+ local headVersion = tonumber(redis.call('GET', headVersionKey))
+
+ -- Check if updates should only be queued if the project already exists (used for gradual rollouts)
+ if not headVersion and onlyIfExists == 'true' then
+ return 'ignore'
+ end
+
+ -- Check that the supplied baseVersion matches the head version
+ -- If headVersion is nil, it means the project does not exist yet and will be created.
+ if headVersion and headVersion ~= baseVersion then
+ return 'conflict'
+ end
+
+ -- Check if there are any changes to queue
+ if #ARGV < changesIndex then
+ return 'no_changes_provided'
+ end
+
+ -- Store the changes
+ -- RPUSH changesKey change1 change2 ...
+ redis.call('RPUSH', changesKey, unpack(ARGV, changesIndex, #ARGV))
+
+ -- Update head snapshot only if changes were successfully pushed
+ redis.call('SET', headSnapshotKey, head)
+
+ -- Update the head version
+ local numChanges = #ARGV - changesIndex + 1
+ local newHeadVersion = baseVersion + numChanges
+ redis.call('SET', headVersionKey, newHeadVersion)
+
+ -- Update the persist time if the new time is sooner
+ local currentPersistTime = tonumber(redis.call('GET', persistTimeKey))
+ if not currentPersistTime or persistTime < currentPersistTime then
+ redis.call('SET', persistTimeKey, persistTime)
+ end
+
+ -- Update the expire time
+ redis.call('SET', expireTimeKey, expireTime)
+
+ return 'ok'
+ `,
+})
+
+/**
+ * Atomically queues changes to the project history in Redis if the baseVersion matches.
+ * Updates head snapshot, version, persist time, and expire time.
+ *
+ * @param {string} projectId - The project identifier.
+ * @param {Snapshot} headSnapshot - The new head snapshot after applying changes.
+ * @param {number} baseVersion - The expected current head version.
+ * @param {Change[]} changes - An array of Change objects to queue.
+ * @param {object} [opts]
+ * @param {number} [opts.persistTime] - Timestamp (ms since epoch) when the
+ * oldest change in the buffer should be persisted.
+ * @param {number} [opts.expireTime] - Timestamp (ms since epoch) when the
+ * project buffer should expire if inactive.
+ * @param {boolean} [opts.onlyIfExists] - If true, only queue changes if the
+ * project already exists in Redis, otherwise ignore.
+ * @returns {Promise} Resolves on success to either 'ok' or 'ignore'.
+ * @throws {BaseVersionConflictError} If the baseVersion does not match the current head version in Redis.
+ * @throws {Error} If changes array is empty or if Redis operations fail.
+ */
+async function queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ changes,
+ opts = {}
+) {
+ if (!changes || changes.length === 0) {
+ throw new Error('Cannot queue empty changes array')
+ }
+
+ const persistTime = opts.persistTime ?? Date.now() + MAX_PERSIST_DELAY_MS
+ const expireTime = opts.expireTime ?? Date.now() + PROJECT_TTL_MS
+ const onlyIfExists = Boolean(opts.onlyIfExists)
+
+ try {
+ const keys = [
+ keySchema.head({ projectId }),
+ keySchema.headVersion({ projectId }),
+ keySchema.changes({ projectId }),
+ keySchema.expireTime({ projectId }),
+ keySchema.persistTime({ projectId }),
+ ]
+
+ const args = [
+ baseVersion.toString(),
+ JSON.stringify(headSnapshot.toRaw()),
+ persistTime.toString(),
+ expireTime.toString(),
+ onlyIfExists.toString(), // Only queue changes if the snapshot already exists
+ ...changes.map(change => JSON.stringify(change.toRaw())), // Serialize changes
+ ]
+
+ const status = await rclient.queue_changes(keys, args)
+ metrics.inc('chunk_store.redis.queue_changes', 1, { status })
+ if (status === 'ok') {
+ return status
+ }
+ if (status === 'ignore') {
+ return status // skip changes when project does not exist and onlyIfExists is true
+ }
+ if (status === 'conflict') {
+ throw new BaseVersionConflictError('base version mismatch', {
+ projectId,
+ baseVersion,
+ })
+ } else {
+ throw new Error(`unexpected result queuing changes: ${status}`)
+ }
+ } catch (err) {
+ if (err instanceof BaseVersionConflictError) {
+ // Re-throw conflict errors directly
+ throw err
+ }
+ metrics.inc('chunk_store.redis.queue_changes', 1, { status: 'error' })
+ throw err
+ }
+}
+
+rclient.defineCommand('get_state', {
+ numberOfKeys: 6, // Number of keys defined in keySchema
+ lua: `
+ local headSnapshotKey = KEYS[1]
+ local headVersionKey = KEYS[2]
+ local persistedVersionKey = KEYS[3]
+ local expireTimeKey = KEYS[4]
+ local persistTimeKey = KEYS[5]
+ local changesKey = KEYS[6]
+
+ local headSnapshot = redis.call('GET', headSnapshotKey)
+ local headVersion = redis.call('GET', headVersionKey)
+ local persistedVersion = redis.call('GET', persistedVersionKey)
+ local expireTime = redis.call('GET', expireTimeKey)
+ local persistTime = redis.call('GET', persistTimeKey)
+ local changes = redis.call('LRANGE', changesKey, 0, -1) -- Get all changes in the list
+
+ return {headSnapshot, headVersion, persistedVersion, expireTime, persistTime, changes}
+ `,
+})
+
+/**
+ * Retrieves the entire state associated with a project from Redis atomically.
+ * @param {string} projectId - The unique identifier of the project.
+ * @returns {Promise} A Promise that resolves to an object containing the project state,
+ * or null if the project state does not exist (e.g., head version is missing).
+ * @throws {Error} If Redis operations fail.
+ */
+async function getState(projectId) {
+ const keys = [
+ keySchema.head({ projectId }),
+ keySchema.headVersion({ projectId }),
+ keySchema.persistedVersion({ projectId }),
+ keySchema.expireTime({ projectId }),
+ keySchema.persistTime({ projectId }),
+ keySchema.changes({ projectId }),
+ ]
+
+ // Pass keys individually, not as an array
+ const result = await rclient.get_state(...keys)
+
+ const [
+ rawHeadSnapshot,
+ rawHeadVersion,
+ rawPersistedVersion,
+ rawExpireTime,
+ rawPersistTime,
+ rawChanges,
+ ] = result
+
+ // Safely parse values, providing defaults or nulls if necessary
+ const headSnapshot = rawHeadSnapshot
+ ? JSON.parse(rawHeadSnapshot)
+ : rawHeadSnapshot
+ const headVersion = rawHeadVersion ? parseInt(rawHeadVersion, 10) : null // Should always exist if result is not null
+ const persistedVersion = rawPersistedVersion
+ ? parseInt(rawPersistedVersion, 10)
+ : null
+ const expireTime = rawExpireTime ? parseInt(rawExpireTime, 10) : null
+ const persistTime = rawPersistTime ? parseInt(rawPersistTime, 10) : null
+ const changes = rawChanges ? rawChanges.map(JSON.parse) : null
+
+ return {
+ headSnapshot,
+ headVersion,
+ persistedVersion,
+ expireTime,
+ persistTime,
+ changes,
+ }
+}
+
+rclient.defineCommand('get_changes_since_version', {
+ numberOfKeys: 2,
+ lua: `
+ local headVersionKey = KEYS[1]
+ local changesKey = KEYS[2]
+
+ local requestedVersion = tonumber(ARGV[1])
+
+ -- Check if head version exists
+ local headVersion = tonumber(redis.call('GET', headVersionKey))
+ if not headVersion then
+ return {'not_found'}
+ end
+
+ -- If requested version equals head version, return empty array
+ if requestedVersion == headVersion then
+ return {'ok', {}}
+ end
+
+ -- If requested version is greater than head version, return error
+ if requestedVersion > headVersion then
+ return {'out_of_bounds'}
+ end
+
+ -- Get length of changes list
+ local changesCount = redis.call('LLEN', changesKey)
+
+ -- Check if requested version is too old (changes already removed from buffer)
+ if requestedVersion < (headVersion - changesCount) then
+ return {'out_of_bounds'}
+ end
+
+ -- Calculate the starting index, using negative indexing to count backwards
+ -- from the end of the list
+ local startIndex = requestedVersion - headVersion
+
+ -- Get changes using LRANGE
+ local changes = redis.call('LRANGE', changesKey, startIndex, -1)
+
+ return {'ok', changes}
+ `,
+})
+
+/**
+ * Retrieves changes since a specific version for a project from Redis.
+ *
+ * @param {string} projectId - The unique identifier of the project.
+ * @param {number} version - The version number to retrieve changes since.
+ * @returns {Promise<{status: string, changes?: Array}>} A Promise that resolves to an object containing:
+ * - status: 'OK', 'NOT_FOUND', or 'OUT_OF_BOUNDS'
+ * - changes: Array of Change objects (only when status is 'OK')
+ * @throws {Error} If Redis operations fail.
+ */
+async function getChangesSinceVersion(projectId, version) {
+ try {
+ const keys = [
+ keySchema.headVersion({ projectId }),
+ keySchema.changes({ projectId }),
+ ]
+
+ const args = [version.toString()]
+
+ const result = await rclient.get_changes_since_version(keys, args)
+ const status = result[0]
+
+ if (status === 'ok') {
+ // If status is OK, parse the changes
+ const changes = result[1]
+ ? result[1].map(rawChange =>
+ typeof rawChange === 'string' ? JSON.parse(rawChange) : rawChange
+ )
+ : []
+
+ metrics.inc('chunk_store.redis.get_changes_since_version', 1, {
+ status: 'success',
+ })
+ return { status, changes }
+ } else {
+ // For other statuses, just return the status
+ metrics.inc('chunk_store.redis.get_changes_since_version', 1, {
+ status,
+ })
+ return { status }
+ }
+ } catch (err) {
+ metrics.inc('chunk_store.redis.get_changes_since_version', 1, {
+ status: 'error',
+ })
+ throw err
+ }
+}
+
+rclient.defineCommand('get_non_persisted_changes', {
+ numberOfKeys: 3,
+ lua: `
+ local headVersionKey = KEYS[1]
+ local persistedVersionKey = KEYS[2]
+ local changesKey = KEYS[3]
+ local baseVersion = tonumber(ARGV[1])
+ local maxChanges = tonumber(ARGV[2])
+
+ -- Check if head version exists
+ local headVersion = tonumber(redis.call('GET', headVersionKey))
+ if not headVersion then
+ return {'not_found'}
+ end
+
+ -- Check if persisted version exists
+ local persistedVersion = tonumber(redis.call('GET', persistedVersionKey))
+ if not persistedVersion then
+ local changesCount = tonumber(redis.call('LLEN', changesKey))
+ persistedVersion = headVersion - changesCount
+ end
+
+ if baseVersion < persistedVersion or baseVersion > headVersion then
+ return {'out_of_bounds'}
+ elseif baseVersion == headVersion then
+ return {'ok', {}}
+ else
+ local numChanges = headVersion - baseVersion
+
+ local endIndex, expectedChanges
+ if maxChanges > 0 and maxChanges < numChanges then
+ -- return only the first maxChanges changes; the end index is inclusive
+ endIndex = -numChanges + maxChanges - 1
+ expectedChanges = maxChanges
+ else
+ endIndex = -1
+ expectedChanges = numChanges
+ end
+
+ local changes = redis.call('LRANGE', changesKey, -numChanges, endIndex)
+
+ if #changes < expectedChanges then
+ -- We didn't get as many changes as we expected
+ return {'out_of_bounds'}
+ end
+
+ return {'ok', changes}
+ end
+ `,
+})
+
+/**
+ * Retrieves non-persisted changes for a project from Redis.
+ *
+ * @param {string} projectId - The unique identifier of the project.
+ * @param {number} baseVersion - The version on top of which the changes should
+ * be applied.
+ * @param {object} [opts]
+ * @param {number} [opts.maxChanges] - The maximum number of changes to return.
+ * Defaults to 0, meaning no limit.
+ * @returns {Promise} Changes that can be applied on top of
+ * baseVersion. An empty array means that the project doesn't have
+ * changes to persist. A null value means that the non-persisted
+ * changes can't be applied to the given base version.
+ *
+ * @throws {Error} If Redis operations fail.
+ */
+async function getNonPersistedChanges(projectId, baseVersion, opts = {}) {
+ let result
+ try {
+ result = await rclient.get_non_persisted_changes(
+ keySchema.headVersion({ projectId }),
+ keySchema.persistedVersion({ projectId }),
+ keySchema.changes({ projectId }),
+ baseVersion.toString(),
+ opts.maxChanges ?? 0
+ )
+ } catch (err) {
+ metrics.inc('chunk_store.redis.get_non_persisted_changes', 1, {
+ status: 'error',
+ })
+ throw err
+ }
+
+ const status = result[0]
+ metrics.inc('chunk_store.redis.get_non_persisted_changes', 1, {
+ status,
+ })
+
+ if (status === 'ok') {
+ return result[1].map(json => Change.fromRaw(JSON.parse(json)))
+ } else if (status === 'not_found') {
+ return []
+ } else if (status === 'out_of_bounds') {
+ throw new VersionOutOfBoundsError(
+ "Non-persisted changes can't be applied to base version",
+ { projectId, baseVersion }
+ )
+ } else {
+ throw new OError('unknown status for get_non_persisted_changes', {
+ projectId,
+ baseVersion,
+ status,
+ })
+ }
+}
+
+rclient.defineCommand('set_persisted_version', {
+ numberOfKeys: 4,
+ lua: `
+ local headVersionKey = KEYS[1]
+ local persistedVersionKey = KEYS[2]
+ local persistTimeKey = KEYS[3]
+ local changesKey = KEYS[4]
+
+ local newPersistedVersion = tonumber(ARGV[1])
+ local maxPersistedChanges = tonumber(ARGV[2])
+
+ -- Check if head version exists
+ local headVersion = tonumber(redis.call('GET', headVersionKey))
+ if not headVersion then
+ return 'not_found'
+ end
+
+ -- Get current persisted version
+ local persistedVersion = tonumber(redis.call('GET', persistedVersionKey))
+ if persistedVersion and persistedVersion > newPersistedVersion then
+ return 'too_low'
+ end
+
+ -- Refuse to set a persisted version that is higher than the head version
+ if newPersistedVersion > headVersion then
+ return 'too_high'
+ end
+
+ -- Set the persisted version
+ redis.call('SET', persistedVersionKey, newPersistedVersion)
+
+ -- Clear the persist time if the persisted version now matches the head version
+ if newPersistedVersion == headVersion then
+ redis.call('DEL', persistTimeKey)
+ end
+
+ -- Calculate the starting index, to keep only maxPersistedChanges beyond the persisted version
+ -- Using negative indexing to count backwards from the end of the list
+ local startIndex = newPersistedVersion - headVersion - maxPersistedChanges
+
+ -- Trim the changes list to keep only the specified number of changes beyond persisted version
+ if startIndex < 0 then
+ redis.call('LTRIM', changesKey, startIndex, -1)
+ end
+
+ return 'ok'
+ `,
+})
+
+/**
+ * Sets the persisted version for a project in Redis and trims the changes list.
+ *
+ * @param {string} projectId - The unique identifier of the project.
+ * @param {number} persistedVersion - The version number to set as persisted.
+ * @returns {Promise} A Promise that resolves to 'OK' or 'NOT_FOUND'.
+ * @throws {Error} If Redis operations fail.
+ */
+async function setPersistedVersion(projectId, persistedVersion) {
+ try {
+ const keys = [
+ keySchema.headVersion({ projectId }),
+ keySchema.persistedVersion({ projectId }),
+ keySchema.persistTime({ projectId }),
+ keySchema.changes({ projectId }),
+ ]
+
+ const args = [persistedVersion.toString(), MAX_PERSISTED_CHANGES.toString()]
+
+ const status = await rclient.set_persisted_version(keys, args)
+
+ metrics.inc('chunk_store.redis.set_persisted_version', 1, {
+ status,
+ })
+
+ if (status === 'too_high') {
+ throw new VersionOutOfBoundsError(
+ 'Persisted version cannot be higher than head version',
+ { projectId, persistedVersion }
+ )
+ }
+
+ return status
+ } catch (err) {
+ metrics.inc('chunk_store.redis.set_persisted_version', 1, {
+ status: 'error',
+ })
+ throw err
+ }
+}
+
+rclient.defineCommand('hard_delete_project', {
+ numberOfKeys: 6,
+ lua: `
+ local headKey = KEYS[1]
+ local headVersionKey = KEYS[2]
+ local persistedVersionKey = KEYS[3]
+ local expireTimeKey = KEYS[4]
+ local persistTimeKey = KEYS[5]
+ local changesKey = KEYS[6]
+ -- Delete all keys associated with the project
+ redis.call('DEL',
+ headKey,
+ headVersionKey,
+ persistedVersionKey,
+ expireTimeKey,
+ persistTimeKey,
+ changesKey
+ )
+ return 'ok'
+ `,
+})
+
+/** Hard delete a project from Redis by removing all keys associated with it.
+ * This is only to be used when a project is **permanently** deleted.
+ * DO NOT USE THIS FOR ANY OTHER PURPOSES AS IT WILL REMOVE NON-PERSISTED CHANGES.
+ * @param {string} projectId - The unique identifier of the project to delete.
+ * @returns {Promise} A Promise that resolves to 'ok' on success.
+ * @throws {Error} If Redis operations fail.
+ */
+async function hardDeleteProject(projectId) {
+ try {
+ const status = await rclient.hard_delete_project(
+ keySchema.head({ projectId }),
+ keySchema.headVersion({ projectId }),
+ keySchema.persistedVersion({ projectId }),
+ keySchema.expireTime({ projectId }),
+ keySchema.persistTime({ projectId }),
+ keySchema.changes({ projectId })
+ )
+ metrics.inc('chunk_store.redis.hard_delete_project', 1, { status })
+ return status
+ } catch (err) {
+ metrics.inc('chunk_store.redis.hard_delete_project', 1, { status: 'error' })
+ throw err
+ }
+}
+
+rclient.defineCommand('set_expire_time', {
+ numberOfKeys: 2,
+ lua: `
+ local expireTimeKey = KEYS[1]
+ local headVersionKey = KEYS[2]
+ local expireTime = tonumber(ARGV[1])
+
+ -- Only set the expire time if the project is loaded in Redis
+ local headVersion = redis.call('GET', headVersionKey)
+ if headVersion then
+ redis.call('SET', expireTimeKey, expireTime)
+ end
+ `,
+})
+
+/**
+ * Sets the expire version for a project in Redis
+ *
+ * @param {string} projectId
+ * @param {number} expireTime - Timestamp (ms since epoch) when the project
+ * buffer should expire if inactive
+ */
+async function setExpireTime(projectId, expireTime) {
+ try {
+ await rclient.set_expire_time(
+ keySchema.expireTime({ projectId }),
+ keySchema.headVersion({ projectId }),
+ expireTime.toString()
+ )
+ metrics.inc('chunk_store.redis.set_expire_time', 1, { status: 'success' })
+ } catch (err) {
+ metrics.inc('chunk_store.redis.set_expire_time', 1, { status: 'error' })
+ throw err
+ }
+}
+
+rclient.defineCommand('expire_project', {
+ numberOfKeys: 6,
+ lua: `
+ local headKey = KEYS[1]
+ local headVersionKey = KEYS[2]
+ local changesKey = KEYS[3]
+ local persistedVersionKey = KEYS[4]
+ local persistTimeKey = KEYS[5]
+ local expireTimeKey = KEYS[6]
+
+ local headVersion = tonumber(redis.call('GET', headVersionKey))
+ if not headVersion then
+ return 'not-found'
+ end
+
+ local persistedVersion = tonumber(redis.call('GET', persistedVersionKey))
+ if not persistedVersion or persistedVersion ~= headVersion then
+ return 'not-persisted'
+ end
+
+ redis.call('DEL',
+ headKey,
+ headVersionKey,
+ changesKey,
+ persistedVersionKey,
+ persistTimeKey,
+ expireTimeKey
+ )
+ return 'success'
+ `,
+})
+
+async function expireProject(projectId) {
+ try {
+ const status = await rclient.expire_project(
+ keySchema.head({ projectId }),
+ keySchema.headVersion({ projectId }),
+ keySchema.changes({ projectId }),
+ keySchema.persistedVersion({ projectId }),
+ keySchema.persistTime({ projectId }),
+ keySchema.expireTime({ projectId })
+ )
+ metrics.inc('chunk_store.redis.expire_project', 1, {
+ status,
+ })
+ return status
+ } catch (err) {
+ metrics.inc('chunk_store.redis.expire_project', 1, {
+ status: 'error',
+ })
+ throw err
+ }
+}
+
+rclient.defineCommand('claim_job', {
+ numberOfKeys: 1,
+ lua: `
+ local jobTimeKey = KEYS[1]
+ local currentTime = tonumber(ARGV[1])
+ local retryDelay = tonumber(ARGV[2])
+
+ local jobTime = tonumber(redis.call('GET', jobTimeKey))
+ if not jobTime then
+ return {'no-job'}
+ end
+
+ local msUntilReady = jobTime - currentTime
+ if msUntilReady <= 0 then
+ local retryTime = currentTime + retryDelay
+ redis.call('SET', jobTimeKey, retryTime)
+ return {'ok', retryTime}
+ else
+ return {'wait', msUntilReady}
+ end
+ `,
+})
+
+rclient.defineCommand('close_job', {
+ numberOfKeys: 1,
+ lua: `
+ local jobTimeKey = KEYS[1]
+ local expectedJobTime = tonumber(ARGV[1])
+
+ local jobTime = tonumber(redis.call('GET', jobTimeKey))
+ if jobTime and jobTime == expectedJobTime then
+ redis.call('DEL', jobTimeKey)
+ end
+ `,
+})
+
+/**
+ * Claim an expire job
+ *
+ * @param {string} projectId
+ * @return {Promise}
+ */
+async function claimExpireJob(projectId) {
+ return await claimJob(keySchema.expireTime({ projectId }))
+}
+
+/**
+ * Claim a persist job
+ *
+ * @param {string} projectId
+ * @return {Promise}
+ */
+async function claimPersistJob(projectId) {
+ return await claimJob(keySchema.persistTime({ projectId }))
+}
+
+/**
+ * Claim a persist or expire job
+ *
+ * @param {string} jobKey - the Redis key containing the time at which the job
+ * is ready
+ * @return {Promise}
+ */
+async function claimJob(jobKey) {
+ let result, status
+ try {
+ result = await rclient.claim_job(jobKey, Date.now(), RETRY_DELAY_MS)
+ status = result[0]
+ metrics.inc('chunk_store.redis.claim_job', 1, { status })
+ } catch (err) {
+ metrics.inc('chunk_store.redis.claim_job', 1, { status: 'error' })
+ throw err
+ }
+
+ if (status === 'ok') {
+ return new Job(jobKey, parseInt(result[1], 10))
+ } else if (status === 'wait') {
+ throw new JobNotReadyError('job not ready', {
+ jobKey,
+ retryTime: result[1],
+ })
+ } else if (status === 'no-job') {
+ throw new JobNotFoundError('job not found', { jobKey })
+ } else {
+ throw new OError('unknown status for claim_job', { jobKey, status })
+ }
+}
+
+/**
+ * Handle for a claimed job
+ */
+class Job {
+ /**
+ * @param {string} redisKey
+ * @param {number} claimTimestamp
+ */
+ constructor(redisKey, claimTimestamp) {
+ this.redisKey = redisKey
+ this.claimTimestamp = claimTimestamp
+ }
+
+ async close() {
+ try {
+ await rclient.close_job(this.redisKey, this.claimTimestamp.toString())
+ metrics.inc('chunk_store.redis.close_job', 1, { status: 'success' })
+ } catch (err) {
+ metrics.inc('chunk_store.redis.close_job', 1, { status: 'error' })
+ throw err
+ }
+ }
+}
+
+module.exports = {
+ getHeadSnapshot,
+ queueChanges,
+ getState,
+ getChangesSinceVersion,
+ getNonPersistedChanges,
+ setPersistedVersion,
+ hardDeleteProject,
+ setExpireTime,
+ expireProject,
+ claimExpireJob,
+ claimPersistJob,
+ MAX_PERSISTED_CHANGES,
+ MAX_PERSIST_DELAY_MS,
+ PROJECT_TTL_MS,
+ RETRY_DELAY_MS,
+ keySchema,
+}
diff --git a/services/history-v1/storage/lib/commit_changes.js b/services/history-v1/storage/lib/commit_changes.js
new file mode 100644
index 0000000000..5749e5fc0e
--- /dev/null
+++ b/services/history-v1/storage/lib/commit_changes.js
@@ -0,0 +1,159 @@
+// @ts-check
+
+'use strict'
+
+const metrics = require('@overleaf/metrics')
+const redisBackend = require('./chunk_store/redis')
+const logger = require('@overleaf/logger')
+const queueChanges = require('./queue_changes')
+const persistChanges = require('./persist_changes')
+const persistBuffer = require('./persist_buffer')
+
+/**
+ * @typedef {import('overleaf-editor-core').Change} Change
+ */
+
+/**
+ * Handle incoming changes by processing them according to the specified options.
+ * @param {string} projectId
+ * @param {Change[]} changes
+ * @param {Object} limits
+ * @param {number} endVersion
+ * @param {Object} options
+ * @param {number} [options.historyBufferLevel] - The history buffer level to use for processing changes.
+ * @param {Boolean} [options.forcePersistBuffer] - If true, forces the buffer to be persisted before any operation.
+ * @return {Promise.}
+ */
+
+async function commitChanges(
+ projectId,
+ changes,
+ limits,
+ endVersion,
+ options = {}
+) {
+ const { historyBufferLevel, forcePersistBuffer } = options
+
+ // Force the buffer to be persisted if specified.
+ if (forcePersistBuffer) {
+ try {
+ const status = await redisBackend.expireProject(projectId) // clear the project from Redis if it is persisted, returns 'not-persisted' if it was not persisted
+ if (status === 'not-persisted') {
+ await persistBuffer(projectId, limits)
+ await redisBackend.expireProject(projectId) // clear the project from Redis after persisting
+ metrics.inc('persist_buffer_force', 1, { status: 'persisted' })
+ }
+ } catch (err) {
+ metrics.inc('persist_buffer_force', 1, { status: 'error' })
+ logger.error(
+ { err, projectId },
+ 'failed to persist buffer before committing changes'
+ )
+ }
+ }
+
+ metrics.inc('commit_changes', 1, {
+ history_buffer_level: historyBufferLevel || 0,
+ })
+
+ // Now handle the changes based on the configured history buffer level.
+ switch (historyBufferLevel) {
+ case 4: // Queue changes and only persist them in the background
+ await queueChanges(projectId, changes, endVersion)
+ return {}
+ case 3: // Queue changes and immediately persist with persistBuffer
+ await queueChanges(projectId, changes, endVersion)
+ return await persistBuffer(projectId, limits)
+ case 2: // Equivalent to queueChangesInRedis:true
+ await queueChangesFake(projectId, changes, endVersion)
+ return await persistChanges(projectId, changes, limits, endVersion)
+ case 1: // Queue changes with fake persist only for projects in redis already
+ await queueChangesFakeOnlyIfExists(projectId, changes, endVersion)
+ return await persistChanges(projectId, changes, limits, endVersion)
+ case 0: // Persist changes directly to the chunk store
+ return await persistChanges(projectId, changes, limits, endVersion)
+ default:
+ throw new Error(`Invalid history buffer level: ${historyBufferLevel}`)
+ }
+}
+
+/**
+ * Queues a set of changes in redis as if they had been persisted, ignoring any errors.
+ * @param {string} projectId
+ * @param {Change[]} changes
+ * @param {number} endVersion
+ * @param {Object} [options]
+ * @param {boolean} [options.onlyIfExists] - If true, only queue changes if the project
+ * already exists in Redis.
+ */
+
+async function queueChangesFake(projectId, changes, endVersion, options = {}) {
+ try {
+ await queueChanges(projectId, changes, endVersion)
+ await fakePersistRedisChanges(projectId, changes, endVersion)
+ } catch (err) {
+ logger.error({ err }, 'Chunk buffer verification failed')
+ }
+}
+
+/**
+ * Queues changes in Redis, simulating persistence, but only if the project already exists.
+ * @param {string} projectId - The ID of the project.
+ * @param {Change[]} changes - An array of changes to be queued.
+ * @param {number} endVersion - The expected version of the project before these changes are applied.
+ */
+
+async function queueChangesFakeOnlyIfExists(projectId, changes, endVersion) {
+ await queueChangesFake(projectId, changes, endVersion, {
+ onlyIfExists: true,
+ })
+}
+
+/**
+ * Simulates the persistence of changes by verifying a given set of changes against
+ * what is currently stored as non-persisted in Redis, and then updates the
+ * persisted version number in Redis.
+ *
+ * @async
+ * @param {string} projectId - The ID of the project.
+ * @param {Change[]} changesToPersist - An array of changes that are expected to be
+ * persisted. These are used for verification
+ * against the changes currently in Redis.
+ * @param {number} baseVersion - The base version number from which to calculate
+ * the new persisted version.
+ * @returns {Promise} A promise that resolves when the persisted version
+ * in Redis has been updated.
+ */
+async function fakePersistRedisChanges(
+ projectId,
+ changesToPersist,
+ baseVersion
+) {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ baseVersion
+ )
+
+ if (
+ serializeChanges(nonPersistedChanges) === serializeChanges(changesToPersist)
+ ) {
+ metrics.inc('persist_redis_changes_verification', 1, { status: 'match' })
+ } else {
+ logger.warn({ projectId }, 'mismatch of non-persisted changes from Redis')
+ metrics.inc('persist_redis_changes_verification', 1, {
+ status: 'mismatch',
+ })
+ }
+
+ const persistedVersion = baseVersion + nonPersistedChanges.length
+ await redisBackend.setPersistedVersion(projectId, persistedVersion)
+}
+
+/**
+ * @param {Change[]} changes
+ */
+function serializeChanges(changes) {
+ return JSON.stringify(changes.map(change => change.toRaw()))
+}
+
+module.exports = commitChanges
diff --git a/services/history-v1/storage/lib/content_hash.js b/services/history-v1/storage/lib/content_hash.js
new file mode 100644
index 0000000000..a381babc04
--- /dev/null
+++ b/services/history-v1/storage/lib/content_hash.js
@@ -0,0 +1,18 @@
+// @ts-check
+
+const { createHash } = require('node:crypto')
+
+/**
+ * Compute a SHA-1 hash of the content
+ *
+ * This is used to validate incoming updates.
+ *
+ * @param {string} content
+ */
+function getContentHash(content) {
+ const hash = createHash('sha-1')
+ hash.update(content)
+ return hash.digest('hex')
+}
+
+module.exports = { getContentHash }
diff --git a/services/history-v1/storage/lib/errors.js b/services/history-v1/storage/lib/errors.js
new file mode 100644
index 0000000000..626536b079
--- /dev/null
+++ b/services/history-v1/storage/lib/errors.js
@@ -0,0 +1,5 @@
+const OError = require('@overleaf/o-error')
+
+class InvalidChangeError extends OError {}
+
+module.exports = { InvalidChangeError }
diff --git a/services/history-v1/storage/lib/history_store.js b/services/history-v1/storage/lib/history_store.js
index 58c46d92c0..e51bdc25c5 100644
--- a/services/history-v1/storage/lib/history_store.js
+++ b/services/history-v1/storage/lib/history_store.js
@@ -21,11 +21,12 @@ const streams = require('./streams')
const Chunk = core.Chunk
const gzip = promisify(zlib.gzip)
+const gunzip = promisify(zlib.gunzip)
class LoadError extends OError {
/**
- * @param {number|string} projectId
- * @param {number|string} chunkId
+ * @param {string} projectId
+ * @param {string} chunkId
* @param {any} cause
*/
constructor(projectId, chunkId, cause) {
@@ -41,8 +42,8 @@ class LoadError extends OError {
class StoreError extends OError {
/**
- * @param {number|string} projectId
- * @param {number|string} chunkId
+ * @param {string} projectId
+ * @param {string} chunkId
* @param {any} cause
*/
constructor(projectId, chunkId, cause) {
@@ -57,8 +58,8 @@ class StoreError extends OError {
}
/**
- * @param {number|string} projectId
- * @param {number|string} chunkId
+ * @param {string} projectId
+ * @param {string} chunkId
* @return {string}
*/
function getKey(projectId, chunkId) {
@@ -88,8 +89,8 @@ class HistoryStore {
/**
* Load the raw object for a History.
*
- * @param {number|string} projectId
- * @param {number|string} chunkId
+ * @param {string} projectId
+ * @param {string} chunkId
* @return {Promise}
*/
async loadRaw(projectId, chunkId) {
@@ -114,11 +115,37 @@ class HistoryStore {
}
}
+ async loadRawWithBuffer(projectId, chunkId) {
+ assert.projectId(projectId, 'bad projectId')
+ assert.chunkId(chunkId, 'bad chunkId')
+
+ const key = getKey(projectId, chunkId)
+
+ logger.debug({ projectId, chunkId }, 'loadBuffer started')
+ try {
+ const buf = await streams.readStreamToBuffer(
+ await this.#persistor.getObjectStream(this.#bucket, key)
+ )
+ const unzipped = await gunzip(buf)
+ return {
+ buffer: buf,
+ raw: JSON.parse(unzipped.toString('utf-8')),
+ }
+ } catch (err) {
+ if (err instanceof objectPersistor.Errors.NotFoundError) {
+ throw new Chunk.NotPersistedError(projectId)
+ }
+ throw new LoadError(projectId, chunkId, err)
+ } finally {
+ logger.debug({ projectId, chunkId }, 'loadBuffer finished')
+ }
+ }
+
/**
* Compress and store a {@link History}.
*
- * @param {number|string} projectId
- * @param {number|string} chunkId
+ * @param {string} projectId
+ * @param {string} chunkId
* @param {import('overleaf-editor-core/lib/types').RawHistory} rawHistory
*/
async storeRaw(projectId, chunkId, rawHistory) {
diff --git a/services/history-v1/storage/lib/knex.js b/services/history-v1/storage/lib/knex.js
index 5cdc85e2ab..7000fe034c 100644
--- a/services/history-v1/storage/lib/knex.js
+++ b/services/history-v1/storage/lib/knex.js
@@ -1,6 +1,8 @@
+// @ts-check
+
'use strict'
const env = process.env.NODE_ENV || 'development'
const knexfile = require('../../knexfile')
-module.exports = require('knex')(knexfile[env])
+module.exports = require('knex').default(knexfile[env])
diff --git a/services/history-v1/storage/lib/knex_read_only.js b/services/history-v1/storage/lib/knex_read_only.js
new file mode 100644
index 0000000000..a78c4689a4
--- /dev/null
+++ b/services/history-v1/storage/lib/knex_read_only.js
@@ -0,0 +1,19 @@
+'use strict'
+
+const config = require('config')
+const knexfile = require('../../knexfile')
+
+const env = process.env.NODE_ENV || 'development'
+
+if (config.databaseUrlReadOnly) {
+ module.exports = require('knex')({
+ ...knexfile[env],
+ pool: {
+ ...knexfile[env].pool,
+ min: 0,
+ },
+ connection: config.databaseUrlReadOnly,
+ })
+} else {
+ module.exports = require('./knex')
+}
diff --git a/services/history-v1/storage/lib/persist_buffer.js b/services/history-v1/storage/lib/persist_buffer.js
new file mode 100644
index 0000000000..68b71e148f
--- /dev/null
+++ b/services/history-v1/storage/lib/persist_buffer.js
@@ -0,0 +1,237 @@
+// @ts-check
+'use strict'
+
+const logger = require('@overleaf/logger')
+const metrics = require('@overleaf/metrics')
+const OError = require('@overleaf/o-error')
+const assert = require('./assert')
+const chunkStore = require('./chunk_store')
+const { BlobStore } = require('./blob_store')
+const BatchBlobStore = require('./batch_blob_store')
+const persistChanges = require('./persist_changes')
+const resyncProject = require('./resync_project')
+const redisBackend = require('./chunk_store/redis')
+
+const PERSIST_BATCH_SIZE = 50
+
+/**
+ * Persist the changes from Redis buffer to the main storage
+ *
+ * Algorithm Outline:
+ * 1. Get the latest chunk's endVersion from the database
+ * 2. Get non-persisted changes from Redis that are after this endVersion.
+ * 3. If no such changes, exit.
+ * 4. Load file blobs for these Redis changes.
+ * 5. Run the persistChanges() algorithm to store these changes into a new chunk(s) in GCS.
+ * - This must not decrease the endVersion. If changes were processed, it must advance.
+ * 6. Set the new persisted version (endVersion of the latest persisted chunk) in Redis.
+ *
+ * @param {string} projectId
+ * @param {Object} limits
+ * @throws {Error | OError} If a critical error occurs during persistence.
+ */
+async function persistBuffer(projectId, limits) {
+ assert.projectId(projectId)
+ logger.debug({ projectId }, 'starting persistBuffer operation')
+
+ // 1. Get the latest chunk's endVersion from GCS/main store
+ let endVersion
+ const latestChunkMetadata = await chunkStore.getLatestChunkMetadata(projectId)
+
+ if (latestChunkMetadata) {
+ endVersion = latestChunkMetadata.endVersion
+ } else {
+ endVersion = 0 // No chunks found, start from version 0
+ logger.debug({ projectId }, 'no existing chunks found in main storage')
+ }
+ const originalEndVersion = endVersion
+
+ logger.debug({ projectId, endVersion }, 'got latest persisted chunk')
+
+ // Process changes in batches
+ let numberOfChangesPersisted = 0
+ let currentChunk = null
+ let resyncNeeded = false
+ let resyncChangesWerePersisted = false
+ while (true) {
+ // 2. Get non-persisted changes from Redis
+ const changesToPersist = await redisBackend.getNonPersistedChanges(
+ projectId,
+ endVersion,
+ { maxChanges: PERSIST_BATCH_SIZE }
+ )
+
+ if (changesToPersist.length === 0) {
+ break
+ }
+
+ logger.debug(
+ {
+ projectId,
+ endVersion,
+ count: changesToPersist.length,
+ },
+ 'found changes in Redis to persist'
+ )
+
+ // 4. Load file blobs for these Redis changes. Errors will propagate.
+ const blobStore = new BlobStore(projectId)
+ const batchBlobStore = new BatchBlobStore(blobStore)
+
+ const blobHashes = new Set()
+ for (const change of changesToPersist) {
+ change.findBlobHashes(blobHashes)
+ }
+ if (blobHashes.size > 0) {
+ await batchBlobStore.preload(Array.from(blobHashes))
+ }
+ for (const change of changesToPersist) {
+ await change.loadFiles('lazy', blobStore)
+ }
+
+ // 5. Run the persistChanges() algorithm. Errors will propagate.
+ logger.debug(
+ {
+ projectId,
+ endVersion,
+ changeCount: changesToPersist.length,
+ },
+ 'calling persistChanges'
+ )
+
+ const persistResult = await persistChanges(
+ projectId,
+ changesToPersist,
+ limits,
+ endVersion
+ )
+
+ if (!persistResult || !persistResult.currentChunk) {
+ metrics.inc('persist_buffer', 1, { status: 'no-chunk-error' })
+ throw new OError(
+ 'persistChanges did not produce a new chunk for non-empty changes',
+ {
+ projectId,
+ endVersion,
+ changeCount: changesToPersist.length,
+ }
+ )
+ }
+
+ currentChunk = persistResult.currentChunk
+ const newEndVersion = currentChunk.getEndVersion()
+
+ if (newEndVersion <= endVersion) {
+ metrics.inc('persist_buffer', 1, { status: 'chunk-version-mismatch' })
+ throw new OError(
+ 'persisted chunk endVersion must be greater than current persisted chunk end version for non-empty changes',
+ {
+ projectId,
+ newEndVersion,
+ endVersion,
+ changeCount: changesToPersist.length,
+ }
+ )
+ }
+
+ logger.debug(
+ {
+ projectId,
+ oldVersion: endVersion,
+ newVersion: newEndVersion,
+ },
+ 'successfully persisted changes from Redis to main storage'
+ )
+
+ // 6. Set the persisted version in Redis. Errors will propagate.
+ const status = await redisBackend.setPersistedVersion(
+ projectId,
+ newEndVersion
+ )
+
+ if (status !== 'ok') {
+ metrics.inc('persist_buffer', 1, { status: 'error-on-persisted-version' })
+ throw new OError('failed to update persisted version in Redis', {
+ projectId,
+ newEndVersion,
+ status,
+ })
+ }
+
+ logger.debug(
+ { projectId, newEndVersion },
+ 'updated persisted version in Redis'
+ )
+ numberOfChangesPersisted += persistResult.numberOfChangesPersisted
+ endVersion = newEndVersion
+
+ // Check if a resync might be needed
+ if (persistResult.resyncNeeded) {
+ resyncNeeded = true
+ }
+
+ if (
+ changesToPersist.some(
+ change => change.getOrigin()?.getKind() === 'history-resync'
+ )
+ ) {
+ resyncChangesWerePersisted = true
+ }
+
+ if (persistResult.numberOfChangesPersisted < PERSIST_BATCH_SIZE) {
+ // We reached the end of available changes
+ break
+ }
+ }
+
+ if (numberOfChangesPersisted === 0) {
+ logger.debug(
+ { projectId, endVersion },
+ 'no new changes in Redis buffer to persist'
+ )
+ metrics.inc('persist_buffer', 1, { status: 'no_changes' })
+ // No changes to persist, update the persisted version in Redis
+ // to match the current endVersion. This shouldn't be needed
+ // unless a worker failed to update the persisted version.
+ await redisBackend.setPersistedVersion(projectId, endVersion)
+ } else {
+ logger.debug(
+ { projectId, finalPersistedVersion: endVersion },
+ 'persistBuffer operation completed successfully'
+ )
+ metrics.inc('persist_buffer', 1, { status: 'persisted' })
+ }
+
+ if (limits.autoResync && resyncNeeded) {
+ if (resyncChangesWerePersisted) {
+ // To avoid an infinite loop, do not resync if the current batch of
+ // changes contains a history resync.
+ logger.warn(
+ { projectId },
+ 'content hash validation failed while persisting a history resync, skipping additional resync'
+ )
+ } else {
+ const backend = chunkStore.getBackend(projectId)
+ const mongoProjectId =
+ await backend.resolveHistoryIdToMongoProjectId(projectId)
+ await resyncProject(mongoProjectId)
+ }
+ }
+
+ if (currentChunk == null) {
+ const { chunk } = await chunkStore.loadByChunkRecord(
+ projectId,
+ latestChunkMetadata
+ )
+ currentChunk = chunk
+ }
+
+ return {
+ numberOfChangesPersisted,
+ originalEndVersion,
+ currentChunk,
+ resyncNeeded,
+ }
+}
+
+module.exports = persistBuffer
diff --git a/services/history-v1/storage/lib/persist_changes.js b/services/history-v1/storage/lib/persist_changes.js
index b661a4818c..d2ca00053f 100644
--- a/services/history-v1/storage/lib/persist_changes.js
+++ b/services/history-v1/storage/lib/persist_changes.js
@@ -1,8 +1,9 @@
-/** @module */
+// @ts-check
+
'use strict'
const _ = require('lodash')
-const BPromise = require('bluebird')
+const logger = require('@overleaf/logger')
const core = require('overleaf-editor-core')
const Chunk = core.Chunk
@@ -10,6 +11,9 @@ const History = core.History
const assert = require('./assert')
const chunkStore = require('./chunk_store')
+const { BlobStore } = require('./blob_store')
+const { InvalidChangeError } = require('./errors')
+const { getContentHash } = require('./content_hash')
function countChangeBytes(change) {
// Note: This is not quite accurate, because the raw change may contain raw
@@ -48,28 +52,35 @@ Timer.prototype.elapsed = function () {
* endVersion may be better suited to the metadata record.
*
* @param {string} projectId
- * @param {Array.} allChanges
+ * @param {core.Change[]} allChanges
* @param {Object} limits
* @param {number} clientEndVersion
* @return {Promise.}
*/
-module.exports = function persistChanges(
- projectId,
- allChanges,
- limits,
- clientEndVersion
-) {
+async function persistChanges(projectId, allChanges, limits, clientEndVersion) {
assert.projectId(projectId)
assert.array(allChanges)
assert.maybe.object(limits)
assert.integer(clientEndVersion)
+ const blobStore = new BlobStore(projectId)
+
+ const earliestChangeTimestamp =
+ allChanges.length > 0 ? allChanges[0].getTimestamp() : null
+
let currentChunk
- // currentSnapshot tracks the latest change that we're applying; we use it to
- // check that the changes we are persisting are valid.
+
+ /**
+ * currentSnapshot tracks the latest change that we're applying; we use it to
+ * check that the changes we are persisting are valid.
+ *
+ * @type {core.Snapshot}
+ */
let currentSnapshot
+
let originalEndVersion
let changesToPersist
+ let resyncNeeded = false
limits = limits || {}
_.defaults(limits, {
@@ -88,57 +99,135 @@ module.exports = function persistChanges(
}
}
- function fillChunk(chunk, changes) {
+ /**
+ * Add changes to a chunk until the chunk is full
+ *
+ * The chunk is full if it reaches a certain number of changes or a certain
+ * size in bytes
+ *
+ * @param {core.Chunk} chunk
+ * @param {core.Change[]} changes
+ */
+ async function fillChunk(chunk, changes) {
let totalBytes = totalChangeBytes(chunk.getChanges())
let changesPushed = false
while (changes.length > 0) {
- if (chunk.getChanges().length >= limits.maxChunkChanges) break
- const changeBytes = countChangeBytes(changes[0])
- if (totalBytes + changeBytes > limits.maxChunkChangeBytes) break
- const changesToFill = changes.splice(0, 1)
- currentSnapshot.applyAll(changesToFill, { strict: true })
- chunk.pushChanges(changesToFill)
+ if (chunk.getChanges().length >= limits.maxChunkChanges) {
+ break
+ }
+
+ const change = changes[0]
+ const changeBytes = countChangeBytes(change)
+
+ if (totalBytes + changeBytes > limits.maxChunkChangeBytes) {
+ break
+ }
+
+ for (const operation of change.iterativelyApplyTo(currentSnapshot, {
+ strict: true,
+ })) {
+ await validateContentHash(operation)
+ }
+
+ chunk.pushChanges([change])
+ changes.shift()
totalBytes += changeBytes
changesPushed = true
}
return changesPushed
}
- function extendLastChunkIfPossible() {
- return chunkStore.loadLatest(projectId).then(function (latestChunk) {
- currentChunk = latestChunk
- originalEndVersion = latestChunk.getEndVersion()
- if (originalEndVersion !== clientEndVersion) {
- throw new Chunk.ConflictingEndVersion(
- clientEndVersion,
- originalEndVersion
- )
+ /**
+ * Check that the operation is valid and can be incorporated to the history.
+ *
+ * For now, this checks content hashes when they are provided.
+ *
+ * @param {core.Operation} operation
+ */
+ async function validateContentHash(operation) {
+ if (operation instanceof core.EditFileOperation) {
+ const editOperation = operation.getOperation()
+ if (
+ editOperation instanceof core.TextOperation &&
+ editOperation.contentHash != null
+ ) {
+ const path = operation.getPathname()
+ const file = currentSnapshot.getFile(path)
+ if (file == null) {
+ throw new InvalidChangeError('file not found for hash validation', {
+ projectId,
+ path,
+ })
+ }
+ await file.load('eager', blobStore)
+ const content = file.getContent({ filterTrackedDeletes: true })
+ const expectedHash = editOperation.contentHash
+ const actualHash = content != null ? getContentHash(content) : null
+ logger.debug({ expectedHash, actualHash }, 'validating content hash')
+ if (actualHash !== expectedHash) {
+ // only log a warning on the first mismatch in each persistChanges call
+ if (!resyncNeeded) {
+ logger.warn(
+ { projectId, path, expectedHash, actualHash },
+ 'content hash mismatch'
+ )
+ }
+ resyncNeeded = true
+ }
+
+ // Remove the content hash from the change before storing it in the chunk.
+ // It was only useful for validation.
+ editOperation.contentHash = null
}
-
- currentSnapshot = latestChunk.getSnapshot().clone()
- const timer = new Timer()
- currentSnapshot.applyAll(latestChunk.getChanges())
-
- if (!fillChunk(currentChunk, changesToPersist)) return
- checkElapsedTime(timer)
-
- return chunkStore.update(projectId, originalEndVersion, currentChunk)
- })
+ }
}
- function createNewChunksAsNeeded() {
- if (changesToPersist.length === 0) return
+ async function loadLatestChunk() {
+ const latestChunk = await chunkStore.loadLatest(projectId, {
+ persistedOnly: true,
+ })
- const endVersion = currentChunk.getEndVersion()
- const history = new History(currentSnapshot.clone(), [])
- const chunk = new Chunk(history, endVersion)
- const timer = new Timer()
- if (fillChunk(chunk, changesToPersist)) {
- checkElapsedTime(timer)
- currentChunk = chunk
- return chunkStore.create(projectId, chunk).then(createNewChunksAsNeeded)
+ currentChunk = latestChunk
+ originalEndVersion = latestChunk.getEndVersion()
+ if (originalEndVersion !== clientEndVersion) {
+ throw new Chunk.ConflictingEndVersion(
+ clientEndVersion,
+ originalEndVersion
+ )
+ }
+
+ currentSnapshot = latestChunk.getSnapshot().clone()
+ currentSnapshot.applyAll(currentChunk.getChanges())
+ }
+
+ async function extendLastChunkIfPossible() {
+ const timer = new Timer()
+ const changesPushed = await fillChunk(currentChunk, changesToPersist)
+ if (!changesPushed) {
+ return
+ }
+
+ checkElapsedTime(timer)
+
+ await chunkStore.update(projectId, currentChunk, earliestChangeTimestamp)
+ }
+
+ async function createNewChunksAsNeeded() {
+ while (changesToPersist.length > 0) {
+ const endVersion = currentChunk.getEndVersion()
+ const history = new History(currentSnapshot.clone(), [])
+ const chunk = new Chunk(history, endVersion)
+ const timer = new Timer()
+
+ const changesPushed = await fillChunk(chunk, changesToPersist)
+ if (changesPushed) {
+ checkElapsedTime(timer)
+ currentChunk = chunk
+ await chunkStore.create(projectId, chunk, earliestChangeTimestamp)
+ } else {
+ throw new Error('failed to fill empty chunk')
+ }
}
- throw new Error('failed to fill empty chunk')
}
function isOlderThanMinChangeTimestamp(change) {
@@ -157,15 +246,20 @@ module.exports = function persistChanges(
if (anyTooOld || tooManyChanges || tooManyBytes) {
changesToPersist = oldChanges
const numberOfChangesToPersist = oldChanges.length
- return extendLastChunkIfPossible()
- .then(createNewChunksAsNeeded)
- .then(function () {
- return {
- numberOfChangesPersisted: numberOfChangesToPersist,
- originalEndVersion,
- currentChunk,
- }
- })
+
+ await loadLatestChunk()
+ await extendLastChunkIfPossible()
+ await createNewChunksAsNeeded()
+
+ return {
+ numberOfChangesPersisted: numberOfChangesToPersist,
+ originalEndVersion,
+ currentChunk,
+ resyncNeeded,
+ }
+ } else {
+ return null
}
- return BPromise.resolve(null)
}
+
+module.exports = persistChanges
diff --git a/services/history-v1/storage/lib/queue_changes.js b/services/history-v1/storage/lib/queue_changes.js
new file mode 100644
index 0000000000..6b8d4b22b4
--- /dev/null
+++ b/services/history-v1/storage/lib/queue_changes.js
@@ -0,0 +1,75 @@
+// @ts-check
+
+'use strict'
+
+const redisBackend = require('./chunk_store/redis')
+const { BlobStore } = require('./blob_store')
+const chunkStore = require('./chunk_store')
+const core = require('overleaf-editor-core')
+const Chunk = core.Chunk
+
+/**
+ * Queues an incoming set of changes after validating them against the current snapshot.
+ *
+ * @async
+ * @function queueChanges
+ * @param {string} projectId - The project to queue changes for.
+ * @param {Array} changesToQueue - An array of change objects to be applied and queued.
+ * @param {number} endVersion - The expected version of the project before these changes are applied.
+ * This is used for optimistic concurrency control.
+ * @param {Object} [opts] - Additional options for queuing changes.
+ * @throws {Chunk.ConflictingEndVersion} If the provided `endVersion` does not match the
+ * current version of the project.
+ * @returns {Promise} A promise that resolves with the status returned by the
+ * `redisBackend.queueChanges` operation.
+ */
+async function queueChanges(projectId, changesToQueue, endVersion, opts) {
+ const result = await redisBackend.getHeadSnapshot(projectId)
+ let currentSnapshot = null
+ let currentVersion = null
+ if (result) {
+ // If we have a snapshot in redis, we can use it to check the current state
+ // of the project and apply changes to it.
+ currentSnapshot = result.snapshot
+ currentVersion = result.version
+ } else {
+ // Otherwise, load the latest chunk from the chunk store.
+ const latestChunk = await chunkStore.loadLatest(projectId, {
+ persistedOnly: true,
+ })
+ // Throw an error if no latest chunk is found, indicating the project has not been initialised.
+ if (!latestChunk) {
+ throw new Chunk.NotFoundError(projectId)
+ }
+ currentSnapshot = latestChunk.getSnapshot()
+ currentSnapshot.applyAll(latestChunk.getChanges())
+ currentVersion = latestChunk.getEndVersion()
+ }
+
+ // Ensure the endVersion matches the current version of the project.
+ if (endVersion !== currentVersion) {
+ throw new Chunk.ConflictingEndVersion(endVersion, currentVersion)
+ }
+
+ // Compute the new hollow snapshot to be saved to redis.
+ const hollowSnapshot = currentSnapshot
+ const blobStore = new BlobStore(projectId)
+ await hollowSnapshot.loadFiles('hollow', blobStore)
+ // Clone the changes to avoid modifying the original ones when computing the hollow snapshot.
+ const hollowChanges = changesToQueue.map(change => change.clone())
+ for (const change of hollowChanges) {
+ await change.loadFiles('hollow', blobStore)
+ }
+ hollowSnapshot.applyAll(hollowChanges, { strict: true })
+ const baseVersion = currentVersion
+ const status = await redisBackend.queueChanges(
+ projectId,
+ hollowSnapshot,
+ baseVersion,
+ changesToQueue,
+ opts
+ )
+ return status
+}
+
+module.exports = queueChanges
diff --git a/services/history-v1/storage/lib/redis.js b/services/history-v1/storage/lib/redis.js
new file mode 100644
index 0000000000..9b00cc0a26
--- /dev/null
+++ b/services/history-v1/storage/lib/redis.js
@@ -0,0 +1,19 @@
+const config = require('config')
+const redis = require('@overleaf/redis-wrapper')
+
+const historyRedisOptions = config.get('redis.history')
+const rclientHistory = redis.createClient(historyRedisOptions)
+
+const lockRedisOptions = config.get('redis.history')
+const rclientLock = redis.createClient(lockRedisOptions)
+
+async function disconnect() {
+ await Promise.all([rclientHistory.disconnect(), rclientLock.disconnect()])
+}
+
+module.exports = {
+ rclientHistory,
+ rclientLock,
+ redis,
+ disconnect,
+}
diff --git a/services/history-v1/storage/lib/resync_project.js b/services/history-v1/storage/lib/resync_project.js
new file mode 100644
index 0000000000..3ec680bb5b
--- /dev/null
+++ b/services/history-v1/storage/lib/resync_project.js
@@ -0,0 +1,14 @@
+// @ts-check
+
+const config = require('config')
+const { fetchNothing } = require('@overleaf/fetch-utils')
+
+const PROJECT_HISTORY_URL = `http://${config.projectHistory.host}:${config.projectHistory.port}`
+
+async function resyncProject(projectId) {
+ await fetchNothing(`${PROJECT_HISTORY_URL}/project/${projectId}/resync`, {
+ method: 'POST',
+ })
+}
+
+module.exports = resyncProject
diff --git a/services/history-v1/storage/lib/scan.js b/services/history-v1/storage/lib/scan.js
new file mode 100644
index 0000000000..d55f5362c1
--- /dev/null
+++ b/services/history-v1/storage/lib/scan.js
@@ -0,0 +1,202 @@
+// @ts-check
+
+'use strict'
+
+const logger = require('@overleaf/logger')
+const { JobNotFoundError, JobNotReadyError } = require('./chunk_store/errors')
+const BATCH_SIZE = 1000 // Default batch size for SCAN
+
+/**
+ * Asynchronously scans a Redis instance or cluster for keys matching a pattern.
+ *
+ * This function handles both standalone Redis instances and Redis clusters.
+ * For clusters, it iterates over all master nodes. It yields keys in batches
+ * as they are found by the SCAN command.
+ *
+ * @param {object} redisClient - The Redis client instance (from @overleaf/redis-wrapper).
+ * @param {string} pattern - The pattern to match keys against (e.g., 'user:*').
+ * @param {number} [count=BATCH_SIZE] - Optional hint for Redis SCAN count per iteration.
+ * @yields {string[]} A batch of matching keys.
+ */
+async function* scanRedisCluster(redisClient, pattern, count = BATCH_SIZE) {
+ const nodes = redisClient.nodes ? redisClient.nodes('master') : [redisClient]
+
+ for (const node of nodes) {
+ let cursor = '0'
+ do {
+ // redisClient from @overleaf/redis-wrapper uses ioredis style commands
+ const [nextCursor, keys] = await node.scan(
+ cursor,
+ 'MATCH',
+ pattern,
+ 'COUNT',
+ count
+ )
+ cursor = nextCursor
+ if (keys.length > 0) {
+ yield keys
+ }
+ } while (cursor !== '0')
+ }
+}
+
+/**
+ * Extracts the content within the first pair of curly braces {} from a string.
+ * This is used to extract a user ID or project ID from a Redis key.
+ *
+ * @param {string} key - The input string containing content within curly braces.
+ * @returns {string | null} The extracted content (the key ID) if found, otherwise null.
+ */
+function extractKeyId(key) {
+ const match = key.match(/\{(.*?)\}/)
+ if (match && match[1]) {
+ return match[1]
+ }
+ return null
+}
+
+/**
+ * Fetches timestamps for a list of project IDs based on a given key name.
+ *
+ * @param {string[]} projectIds - Array of project identifiers.
+ * @param {object} rclient - The Redis client instance.
+ * @param {string} keyName - The base name for the Redis keys storing the timestamps (e.g., "expire-time", "persist-time").
+ * @param {number} currentTime - The current time (timestamp in milliseconds) to compare against.
+ * @returns {Promise>}
+ * A promise that resolves to an array of objects, each containing a projectId and
+ * its corresponding timestampValue, for due projects only.
+ */
+async function fetchOverdueProjects(projectIds, rclient, keyName, currentTime) {
+ if (!projectIds || projectIds.length === 0) {
+ return []
+ }
+ const timestampKeys = projectIds.map(id => `${keyName}:{${id}}`)
+ const timestamps = await rclient.mget(timestampKeys)
+
+ const dueProjects = []
+ for (let i = 0; i < projectIds.length; i++) {
+ const projectId = projectIds[i]
+ const timestampValue = timestamps[i]
+
+ if (timestampValue !== null) {
+ const timestamp = parseInt(timestampValue, 10)
+ if (!isNaN(timestamp) && currentTime > timestamp) {
+ dueProjects.push({ projectId, timestampValue })
+ }
+ }
+ }
+ return dueProjects
+}
+
+/**
+ * Scans Redis for keys matching a pattern derived from keyName, identifies items that are "due" based on a timestamp,
+ * and performs a specified action on them.
+ *
+ * @param {object} rclient - The Redis client instance.
+ * @param {string} taskName - A descriptive name for the task (used in logging).
+ * @param {string} keyName - The base name for the Redis keys (e.g., "expire-time", "persist-time").
+ * The function will derive the key prefix as `${keyName}:` and scan pattern as `${keyName}:{*}`.
+ * @param {function(string): Promise} actionFn - An async function that takes a projectId and performs an action.
+ * @param {boolean} DRY_RUN - If true, logs actions that would be taken without performing them.
+ * @returns {Promise<{scannedKeyCount: number, processedKeyCount: number}>} Counts of scanned and processed keys.
+ */
+async function scanAndProcessDueItems(
+ rclient,
+ taskName,
+ keyName,
+ actionFn,
+ DRY_RUN
+) {
+ let scannedKeyCount = 0
+ let processedKeyCount = 0
+ const START_TIME = Date.now()
+ const logContext = { taskName, dryRun: DRY_RUN }
+
+ const scanPattern = `${keyName}:{*}`
+
+ if (DRY_RUN) {
+ logger.info(logContext, `Starting ${taskName} scan in DRY RUN mode`)
+ } else {
+ logger.info(logContext, `Starting ${taskName} scan`)
+ }
+
+ for await (const keysBatch of scanRedisCluster(rclient, scanPattern)) {
+ scannedKeyCount += keysBatch.length
+ const projectIds = keysBatch.map(extractKeyId).filter(id => id != null)
+
+ if (projectIds.length === 0) {
+ continue
+ }
+
+ const currentTime = Date.now()
+ const overdueProjects = await fetchOverdueProjects(
+ projectIds,
+ rclient,
+ keyName,
+ currentTime
+ )
+
+ for (const project of overdueProjects) {
+ const { projectId } = project
+ if (DRY_RUN) {
+ logger.info(
+ { ...logContext, projectId },
+ `[Dry Run] Would perform ${taskName} for project`
+ )
+ } else {
+ try {
+ await actionFn(projectId)
+ logger.debug(
+ { ...logContext, projectId },
+ `Successfully performed ${taskName} for project`
+ )
+ } catch (err) {
+ if (err instanceof JobNotReadyError) {
+ // the project has been touched since the job was created
+ logger.info(
+ { ...logContext, projectId },
+ `Job not ready for ${taskName} for project`
+ )
+ } else if (err instanceof JobNotFoundError) {
+ // the project has been expired already by another worker
+ logger.info(
+ { ...logContext, projectId },
+ `Job not found for ${taskName} for project`
+ )
+ } else {
+ logger.error(
+ { ...logContext, projectId, err },
+ `Error performing ${taskName} for project`
+ )
+ }
+ continue
+ }
+ }
+ processedKeyCount++
+
+ if (processedKeyCount % 1000 === 0 && processedKeyCount > 0) {
+ logger.info(
+ { ...logContext, scannedKeyCount, processedKeyCount },
+ `${taskName} scan progress`
+ )
+ }
+ }
+ }
+
+ logger.info(
+ {
+ ...logContext,
+ scannedKeyCount,
+ processedKeyCount,
+ elapsedTimeInSeconds: Math.floor((Date.now() - START_TIME) / 1000),
+ },
+ `${taskName} scan complete`
+ )
+ return { scannedKeyCount, processedKeyCount }
+}
+
+module.exports = {
+ scanRedisCluster,
+ extractKeyId,
+ scanAndProcessDueItems,
+}
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
index a0abfa52a7..2e12328e5c 100644
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
@@ -1,28 +1,20 @@
// @ts-check
-import Crypto from 'node:crypto'
import Events from 'node:events'
import fs from 'node:fs'
import Path from 'node:path'
import { performance } from 'node:perf_hooks'
import Stream from 'node:stream'
-import zLib from 'node:zlib'
import { setTimeout } from 'node:timers/promises'
-import { Binary, ObjectId } from 'mongodb'
+import { ObjectId } from 'mongodb'
import pLimit from 'p-limit'
import logger from '@overleaf/logger'
import {
batchedUpdate,
objectIdFromInput,
renderObjectId,
- READ_PREFERENCE_SECONDARY,
} from '@overleaf/mongo-utils/batchedUpdate.js'
import OError from '@overleaf/o-error'
-import {
- AlreadyWrittenError,
- NoKEKMatchedError,
- NotFoundError,
-} from '@overleaf/object-persistor/src/Errors.js'
-import { backupPersistor, projectBlobsBucket } from '../lib/backupPersistor.mjs'
+import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
import {
BlobStore,
GLOBAL_BLOBS,
@@ -30,11 +22,10 @@ import {
getProjectBlobsBatch,
getStringLengthOfFile,
makeBlobForFile,
- makeProjectKey,
} from '../lib/blob_store/index.js'
-import { backedUpBlobs as backedUpBlobsCollection, db } from '../lib/mongodb.js'
-import filestorePersistor from '../lib/persistor.js'
+import { db } from '../lib/mongodb.js'
import commandLineArgs from 'command-line-args'
+import readline from 'node:readline'
// Silence warning.
Events.setMaxListeners(20)
@@ -46,6 +37,8 @@ ObjectId.cacheHexString = true
* @typedef {import("overleaf-editor-core").Blob} Blob
* @typedef {import("perf_hooks").EventLoopUtilization} EventLoopUtilization
* @typedef {import("mongodb").Collection} Collection
+ * @typedef {import("mongodb").Collection} ProjectsCollection
+ * @typedef {import("mongodb").Collection<{project:Project}>} DeletedProjectsCollection
* @typedef {import("@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor").CachedPerProjectEncryptedS3Persistor} CachedPerProjectEncryptedS3Persistor
*/
@@ -86,17 +79,16 @@ ObjectId.cacheHexString = true
*/
/**
- * @return {{PROCESS_HASHED_FILES: boolean, PROCESS_DELETED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, PROCESS_BLOBS: boolean, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, COLLECT_BACKED_UP_BLOBS: boolean}}
+ * @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, PROCESS_BLOBS: boolean, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean}}
*/
function parseArgs() {
const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
const args = commandLineArgs([
{ name: 'processNonDeletedProjects', type: String, defaultValue: 'false' },
{ name: 'processDeletedProjects', type: String, defaultValue: 'false' },
- { name: 'processDeletedFiles', type: String, defaultValue: 'false' },
{ name: 'processHashedFiles', type: String, defaultValue: 'false' },
{ name: 'processBlobs', type: String, defaultValue: 'true' },
- { name: 'collectBackedUpBlobs', type: String, defaultValue: 'true' },
+ { name: 'projectIdsFrom', type: String, defaultValue: '' },
{
name: 'BATCH_RANGE_START',
type: String,
@@ -127,12 +119,11 @@ function parseArgs() {
PROCESS_NON_DELETED_PROJECTS: boolVal('processNonDeletedProjects'),
PROCESS_DELETED_PROJECTS: boolVal('processDeletedProjects'),
PROCESS_BLOBS: boolVal('processBlobs'),
- PROCESS_DELETED_FILES: boolVal('processDeletedFiles'),
PROCESS_HASHED_FILES: boolVal('processHashedFiles'),
- COLLECT_BACKED_UP_BLOBS: boolVal('collectBackedUpBlobs'),
BATCH_RANGE_START,
BATCH_RANGE_END,
LOGGING_IDENTIFIER: args['LOGGING_IDENTIFIER'] || BATCH_RANGE_START,
+ PROJECT_IDS_FROM: args['projectIdsFrom'],
}
}
@@ -140,12 +131,11 @@ const {
PROCESS_NON_DELETED_PROJECTS,
PROCESS_DELETED_PROJECTS,
PROCESS_BLOBS,
- PROCESS_DELETED_FILES,
PROCESS_HASHED_FILES,
- COLLECT_BACKED_UP_BLOBS,
BATCH_RANGE_START,
BATCH_RANGE_END,
LOGGING_IDENTIFIER,
+ PROJECT_IDS_FROM,
} = parseArgs()
// We need to handle the start and end differently as ids of deleted projects are created at time of deletion.
@@ -160,10 +150,6 @@ const CONCURRENT_BATCHES = parseInt(process.env.CONCURRENT_BATCHES || '2', 10)
const RETRIES = parseInt(process.env.RETRIES || '10', 10)
const RETRY_DELAY_MS = parseInt(process.env.RETRY_DELAY_MS || '100', 10)
-const USER_FILES_BUCKET_NAME = process.env.USER_FILES_BUCKET_NAME || ''
-if (!USER_FILES_BUCKET_NAME) {
- throw new Error('env var USER_FILES_BUCKET_NAME is missing')
-}
const RETRY_FILESTORE_404 = process.env.RETRY_FILESTORE_404 === 'true'
const BUFFER_DIR = fs.mkdtempSync(
process.env.BUFFER_DIR_PREFIX || '/tmp/back_fill_file_hash-'
@@ -174,10 +160,45 @@ const STREAM_HIGH_WATER_MARK = parseInt(
10
)
const LOGGING_INTERVAL = parseInt(process.env.LOGGING_INTERVAL || '60000', 10)
+const SLEEP_BEFORE_EXIT = parseInt(process.env.SLEEP_BEFORE_EXIT || '1000', 10)
+
+// Filestore endpoint location
+const FILESTORE_HOST = process.env.FILESTORE_HOST || '127.0.0.1'
+const FILESTORE_PORT = process.env.FILESTORE_PORT || '3009'
+
+async function fetchFromFilestore(projectId, fileId) {
+ const url = `http://${FILESTORE_HOST}:${FILESTORE_PORT}/project/${projectId}/file/${fileId}`
+ const response = await fetch(url)
+ if (!response.ok) {
+ if (response.status === 404) {
+ throw new NotFoundError('file not found in filestore', {
+ status: response.status,
+ })
+ }
+ const body = await response.text()
+ throw new OError('fetchFromFilestore failed', {
+ projectId,
+ fileId,
+ status: response.status,
+ body,
+ })
+ }
+ if (!response.body) {
+ throw new OError('fetchFromFilestore response has no body', {
+ projectId,
+ fileId,
+ status: response.status,
+ })
+ }
+ return response.body
+}
const projectsCollection = db.collection('projects')
+/** @type {ProjectsCollection} */
+const typedProjectsCollection = db.collection('projects')
const deletedProjectsCollection = db.collection('deletedProjects')
-const deletedFilesCollection = db.collection('deletedFiles')
+/** @type {DeletedProjectsCollection} */
+const typedDeletedProjectsCollection = db.collection('deletedProjects')
const concurrencyLimit = pLimit(CONCURRENCY)
@@ -195,7 +216,6 @@ async function processConcurrently(array, fn) {
const STATS = {
projects: 0,
blobs: 0,
- backedUpBlobs: 0,
filesWithHash: 0,
filesWithoutHash: 0,
filesDuplicated: 0,
@@ -209,14 +229,8 @@ const STATS = {
projectHardDeleted: 0,
fileHardDeleted: 0,
mongoUpdates: 0,
- deduplicatedWriteToAWSLocalCount: 0,
- deduplicatedWriteToAWSLocalEgress: 0,
- deduplicatedWriteToAWSRemoteCount: 0,
- deduplicatedWriteToAWSRemoteEgress: 0,
readFromGCSCount: 0,
readFromGCSIngress: 0,
- writeToAWSCount: 0,
- writeToAWSEgress: 0,
writeToGCSCount: 0,
writeToGCSEgress: 0,
}
@@ -238,7 +252,7 @@ function toMiBPerSecond(v, ms) {
/**
* @param {any} stats
* @param {number} ms
- * @return {{writeToAWSThroughputMiBPerSecond: number, readFromGCSThroughputMiBPerSecond: number}}
+ * @return {{readFromGCSThroughputMiBPerSecond: number}}
*/
function bandwidthStats(stats, ms) {
return {
@@ -246,10 +260,6 @@ function bandwidthStats(stats, ms) {
stats.readFromGCSIngress,
ms
),
- writeToAWSThroughputMiBPerSecond: toMiBPerSecond(
- stats.writeToAWSEgress,
- ms
- ),
}
}
@@ -341,14 +351,10 @@ async function processFile(entry, filePath) {
} catch (err) {
if (gracefulShutdownInitiated) throw err
if (err instanceof NotFoundError) {
- const { bucketName } = OError.getFullInfo(err)
- if (bucketName === USER_FILES_BUCKET_NAME && !RETRY_FILESTORE_404) {
+ if (!RETRY_FILESTORE_404) {
throw err // disable retries for not found in filestore bucket case
}
}
- if (err instanceof NoKEKMatchedError) {
- throw err // disable retries when upload to S3 will fail again
- }
STATS.filesRetries++
const {
ctx: { projectId },
@@ -377,42 +383,17 @@ async function processFileOnce(entry, filePath) {
ctx: { projectId, historyId },
fileId,
} = entry
- const blobStore = new BlobStore(historyId)
- if (entry.blob) {
- const { blob } = entry
- const hash = blob.getHash()
- if (entry.ctx.hasBackedUpBlob(hash)) {
- STATS.deduplicatedWriteToAWSLocalCount++
- STATS.deduplicatedWriteToAWSLocalEgress += estimateBlobSize(blob)
- return hash
- }
- entry.ctx.recordPendingBlob(hash)
- STATS.readFromGCSCount++
- const src = await blobStore.getStream(hash)
- const dst = fs.createWriteStream(filePath, {
- highWaterMark: STREAM_HIGH_WATER_MARK,
- })
- try {
- await Stream.promises.pipeline(src, dst)
- } finally {
- STATS.readFromGCSIngress += dst.bytesWritten
- }
- await uploadBlobToAWS(entry, blob, filePath)
- return hash
- }
- if (entry.hash && entry.ctx.hasBackedUpBlob(entry.hash)) {
- STATS.deduplicatedWriteToAWSLocalCount++
- const blob = entry.ctx.getCachedHistoryBlob(entry.hash)
- // blob might not exist on re-run with --PROCESS_BLOBS=false
- if (blob) STATS.deduplicatedWriteToAWSLocalEgress += estimateBlobSize(blob)
+ if (entry.hash && entry.ctx.hasCompletedBlob(entry.hash)) {
+ // We can enter this case for two identical files in the same project,
+ // one with hash, the other without. When the one without hash gets
+ // processed first, we can skip downloading the other one we already
+ // know the hash of.
return entry.hash
}
-
+ const blobStore = new BlobStore(historyId)
STATS.readFromGCSCount++
- const src = await filestorePersistor.getObjectStream(
- USER_FILES_BUCKET_NAME,
- `${projectId}/${fileId}`
- )
+ // make a fetch request to filestore itself
+ const src = await fetchFromFilestore(projectId, fileId)
const dst = fs.createWriteStream(filePath, {
highWaterMark: STREAM_HIGH_WATER_MARK,
})
@@ -435,16 +416,14 @@ async function processFileOnce(entry, filePath) {
STATS.globalBlobsEgress += estimateBlobSize(blob)
return hash
}
- if (entry.ctx.hasBackedUpBlob(hash)) {
- STATS.deduplicatedWriteToAWSLocalCount++
- STATS.deduplicatedWriteToAWSLocalEgress += estimateBlobSize(blob)
+ if (entry.ctx.hasCompletedBlob(hash)) {
return hash
}
entry.ctx.recordPendingBlob(hash)
try {
await uploadBlobToGCS(blobStore, entry, blob, hash, filePath)
- await uploadBlobToAWS(entry, blob, filePath)
+ entry.ctx.recordCompletedBlob(hash) // mark upload as completed
} catch (err) {
entry.ctx.recordFailedBlob(hash)
throw err
@@ -481,76 +460,6 @@ async function uploadBlobToGCS(blobStore, entry, blob, hash, filePath) {
const GZ_SUFFIX = '.gz'
-/**
- * @param {QueueEntry} entry
- * @param {Blob} blob
- * @param {string} filePath
- * @return {Promise}
- */
-async function uploadBlobToAWS(entry, blob, filePath) {
- const { historyId } = entry.ctx
- let backupSource
- let contentEncoding
- const md5 = Crypto.createHash('md5')
- let size
- if (blob.getStringLength()) {
- const filePathCompressed = filePath + GZ_SUFFIX
- backupSource = filePathCompressed
- contentEncoding = 'gzip'
- size = 0
- await Stream.promises.pipeline(
- fs.createReadStream(filePath, { highWaterMark: STREAM_HIGH_WATER_MARK }),
- zLib.createGzip(),
- async function* (source) {
- for await (const chunk of source) {
- size += chunk.byteLength
- md5.update(chunk)
- yield chunk
- }
- },
- fs.createWriteStream(filePathCompressed, {
- highWaterMark: STREAM_HIGH_WATER_MARK,
- })
- )
- } else {
- backupSource = filePath
- size = blob.getByteLength()
- await Stream.promises.pipeline(
- fs.createReadStream(filePath, { highWaterMark: STREAM_HIGH_WATER_MARK }),
- md5
- )
- }
- const backendKeyPath = makeProjectKey(historyId, blob.getHash())
- const persistor = await entry.ctx.getCachedPersistor(backendKeyPath)
- try {
- STATS.writeToAWSCount++
- await persistor.sendStream(
- projectBlobsBucket,
- backendKeyPath,
- fs.createReadStream(backupSource, {
- highWaterMark: STREAM_HIGH_WATER_MARK,
- }),
- {
- contentEncoding,
- contentType: 'application/octet-stream',
- contentLength: size,
- sourceMd5: md5.digest('hex'),
- ifNoneMatch: '*', // de-duplicate write (we pay for the request, but avoid egress)
- }
- )
- STATS.writeToAWSEgress += size
- } catch (err) {
- if (err instanceof AlreadyWrittenError) {
- STATS.deduplicatedWriteToAWSRemoteCount++
- STATS.deduplicatedWriteToAWSRemoteEgress += size
- } else {
- STATS.writeToAWSEgress += size
- throw err
- }
- }
- entry.ctx.recordBackedUpBlob(blob.getHash())
-}
-
/**
* @param {Array} files
* @return {Promise}
@@ -636,30 +545,19 @@ async function queueNextBatch(batch, prefix = 'rootFolder.0') {
* @return {Promise}
*/
async function processBatch(batch, prefix = 'rootFolder.0') {
- const [deletedFiles, { nBlobs, blobs }, { nBackedUpBlobs, backedUpBlobs }] =
- await Promise.all([
- collectDeletedFiles(batch),
- collectProjectBlobs(batch),
- collectBackedUpBlobs(batch),
- ])
- const files = Array.from(
- findFileInBatch(batch, prefix, deletedFiles, blobs, backedUpBlobs)
- )
+ const { nBlobs, blobs } = await collectProjectBlobs(batch)
+ const files = Array.from(findFileInBatch(batch, prefix, blobs))
STATS.projects += batch.length
STATS.blobs += nBlobs
- STATS.backedUpBlobs += nBackedUpBlobs
// GC
batch.length = 0
- deletedFiles.clear()
blobs.clear()
- backedUpBlobs.clear()
// The files are currently ordered by project-id.
- // Order them by file-id ASC then blobs ASC to
- // - process files before blobs
- // - avoid head-of-line blocking from many project-files waiting on the generation of the projects DEK (round trip to AWS)
- // - bonus: increase chance of de-duplicating write to AWS
+ // Order them by file-id ASC then hash ASC to
+ // increase the hit rate on the "already processed
+ // hash for project" checks.
files.sort(
/**
* @param {QueueEntry} a
@@ -702,9 +600,7 @@ async function handleDeletedFileTreeBatch(batch) {
* @return {Promise}
*/
async function tryUpdateFileRefInMongo(entry) {
- if (entry.path === MONGO_PATH_DELETED_FILE) {
- return await tryUpdateDeletedFileRefInMongo(entry)
- } else if (entry.path.startsWith('project.')) {
+ if (entry.path.startsWith('project.')) {
return await tryUpdateFileRefInMongoInDeletedProject(entry)
}
@@ -721,22 +617,6 @@ async function tryUpdateFileRefInMongo(entry) {
return result.matchedCount === 1
}
-/**
- * @param {QueueEntry} entry
- * @return {Promise}
- */
-async function tryUpdateDeletedFileRefInMongo(entry) {
- STATS.mongoUpdates++
- const result = await deletedFilesCollection.updateOne(
- {
- _id: new ObjectId(entry.fileId),
- projectId: entry.ctx.projectId,
- },
- { $set: { hash: entry.hash } }
- )
- return result.matchedCount === 1
-}
-
/**
* @param {QueueEntry} entry
* @return {Promise}
@@ -801,7 +681,6 @@ async function updateFileRefInMongo(entry) {
break
}
if (!found) {
- if (await tryUpdateDeletedFileRefInMongo(entry)) return
STATS.fileHardDeleted++
console.warn('bug: file hard-deleted while processing', projectId, fileId)
return
@@ -894,60 +773,15 @@ function* findFiles(ctx, folder, path, isInputLoop = false) {
/**
* @param {Array} projects
* @param {string} prefix
- * @param {Map>} deletedFiles
* @param {Map>} blobs
- * @param {Map>} backedUpBlobs
* @return Generator
*/
-function* findFileInBatch(
- projects,
- prefix,
- deletedFiles,
- blobs,
- backedUpBlobs
-) {
+function* findFileInBatch(projects, prefix, blobs) {
for (const project of projects) {
const projectIdS = project._id.toString()
const historyIdS = project.overleaf.history.id.toString()
const projectBlobs = blobs.get(historyIdS) || []
- const projectBackedUpBlobs = new Set(backedUpBlobs.get(projectIdS) || [])
- const projectDeletedFiles = deletedFiles.get(projectIdS) || []
- const ctx = new ProjectContext(
- project._id,
- historyIdS,
- projectBlobs,
- projectBackedUpBlobs
- )
- for (const fileRef of projectDeletedFiles) {
- const fileId = fileRef._id.toString()
- if (fileRef.hash) {
- if (ctx.canSkipProcessingHashedFile(fileRef.hash)) continue
- ctx.remainingQueueEntries++
- STATS.filesWithHash++
- yield {
- ctx,
- cacheKey: fileRef.hash,
- fileId,
- hash: fileRef.hash,
- path: MONGO_PATH_SKIP_WRITE_HASH_TO_FILE_TREE,
- }
- } else {
- ctx.remainingQueueEntries++
- STATS.filesWithoutHash++
- yield { ctx, cacheKey: fileId, fileId, path: MONGO_PATH_DELETED_FILE }
- }
- }
- for (const blob of projectBlobs) {
- if (projectBackedUpBlobs.has(blob.getHash())) continue
- ctx.remainingQueueEntries++
- yield {
- ctx,
- cacheKey: blob.getHash(),
- path: MONGO_PATH_SKIP_WRITE_HASH_TO_FILE_TREE,
- blob,
- hash: blob.getHash(),
- }
- }
+ const ctx = new ProjectContext(project._id, historyIdS, projectBlobs)
try {
yield* findFiles(ctx, project.rootFolder?.[0], prefix, true)
} catch (err) {
@@ -970,78 +804,11 @@ async function collectProjectBlobs(batch) {
return await getProjectBlobsBatch(batch.map(p => p.overleaf.history.id))
}
-/**
- * @param {Array} projects
- * @return {Promise>>}
- */
-async function collectDeletedFiles(projects) {
- const deletedFiles = new Map()
- if (!PROCESS_DELETED_FILES) return deletedFiles
-
- const cursor = deletedFilesCollection.find(
- {
- projectId: { $in: projects.map(p => p._id) },
- ...(PROCESS_HASHED_FILES
- ? {}
- : {
- hash: { $exists: false },
- }),
- },
- {
- projection: { _id: 1, projectId: 1, hash: 1 },
- readPreference: READ_PREFERENCE_SECONDARY,
- sort: { projectId: 1 },
- }
- )
- for await (const deletedFileRef of cursor) {
- const projectId = deletedFileRef.projectId.toString()
- const found = deletedFiles.get(projectId)
- if (found) {
- found.push(deletedFileRef)
- } else {
- deletedFiles.set(projectId, [deletedFileRef])
- }
- }
- return deletedFiles
-}
-
-/**
- * @param {Array} projects
- * @return {Promise<{nBackedUpBlobs:number,backedUpBlobs:Map>}>}
- */
-async function collectBackedUpBlobs(projects) {
- let nBackedUpBlobs = 0
- const backedUpBlobs = new Map()
- if (!COLLECT_BACKED_UP_BLOBS) return { nBackedUpBlobs, backedUpBlobs }
-
- const cursor = backedUpBlobsCollection.find(
- { _id: { $in: projects.map(p => p._id) } },
- {
- readPreference: READ_PREFERENCE_SECONDARY,
- sort: { _id: 1 },
- }
- )
- for await (const record of cursor) {
- const blobs = record.blobs.map(b => b.toString('hex'))
- backedUpBlobs.set(record._id.toString(), blobs)
- nBackedUpBlobs += blobs.length
- }
- return { nBackedUpBlobs, backedUpBlobs }
-}
-
-const BATCH_HASH_WRITES = 1_000
const BATCH_FILE_UPDATES = 100
-const MONGO_PATH_DELETED_FILE = 'deleted-file'
const MONGO_PATH_SKIP_WRITE_HASH_TO_FILE_TREE = 'skip-write-to-file-tree'
class ProjectContext {
- /** @type {Promise | null} */
- #cachedPersistorPromise = null
-
- /** @type {Set} */
- #backedUpBlobs
-
/** @type {Map} */
#historyBlobs
@@ -1055,12 +822,10 @@ class ProjectContext {
* @param {ObjectId} projectId
* @param {string} historyId
* @param {Array} blobs
- * @param {Set} backedUpBlobs
*/
- constructor(projectId, historyId, blobs, backedUpBlobs) {
+ constructor(projectId, historyId, blobs) {
this.projectId = projectId
this.historyId = historyId
- this.#backedUpBlobs = backedUpBlobs
this.#historyBlobs = new Map(blobs.map(b => [b.getHash(), b]))
}
@@ -1089,75 +854,17 @@ class ProjectContext {
return false
}
- /**
- * @param {string} key
- * @return {Promise}
- */
- getCachedPersistor(key) {
- if (!this.#cachedPersistorPromise) {
- // Fetch DEK once, but only if needed -- upon the first use
- this.#cachedPersistorPromise = this.#getCachedPersistorWithRetries(key)
- }
- return this.#cachedPersistorPromise
- }
-
- /**
- * @param {string} key
- * @return {Promise}
- */
- async #getCachedPersistorWithRetries(key) {
- // Optimization: Skip GET on DEK in case no blobs are marked as backed up yet.
- let tryGenerateDEKFirst = this.#backedUpBlobs.size === 0
- for (let attempt = 0; attempt < RETRIES; attempt++) {
- try {
- if (tryGenerateDEKFirst) {
- try {
- return await backupPersistor.generateDataEncryptionKey(
- projectBlobsBucket,
- key
- )
- } catch (err) {
- if (err instanceof AlreadyWrittenError) {
- tryGenerateDEKFirst = false
- // fall back to GET below
- } else {
- throw err
- }
- }
- }
- return await backupPersistor.forProject(projectBlobsBucket, key)
- } catch (err) {
- if (gracefulShutdownInitiated) throw err
- if (err instanceof NoKEKMatchedError) {
- throw err
- } else {
- logger.warn(
- { err, projectId: this.projectId, attempt },
- 'failed to get DEK, trying again'
- )
- const jitter = Math.random() * RETRY_DELAY_MS
- await setTimeout(RETRY_DELAY_MS + jitter)
- }
- }
- }
- return await backupPersistor.forProject(projectBlobsBucket, key)
- }
-
async flushMongoQueuesIfNeeded() {
if (this.remainingQueueEntries === 0) {
await this.flushMongoQueues()
}
- if (this.#completedBlobs.size > BATCH_HASH_WRITES) {
- await this.#storeBackedUpBlobs()
- }
if (this.#pendingFileWrites.length > BATCH_FILE_UPDATES) {
await this.#storeFileHashes()
}
}
async flushMongoQueues() {
- await this.#storeBackedUpBlobs()
await this.#storeFileHashes()
}
@@ -1166,20 +873,6 @@ class ProjectContext {
/** @type {Set} */
#completedBlobs = new Set()
- async #storeBackedUpBlobs() {
- if (this.#completedBlobs.size === 0) return
- const blobs = Array.from(this.#completedBlobs).map(
- hash => new Binary(Buffer.from(hash, 'hex'))
- )
- this.#completedBlobs.clear()
- STATS.mongoUpdates++
- await backedUpBlobsCollection.updateOne(
- { _id: this.projectId },
- { $addToSet: { blobs: { $each: blobs } } },
- { upsert: true }
- )
- }
-
/**
* @param {string} hash
*/
@@ -1197,8 +890,7 @@ class ProjectContext {
/**
* @param {string} hash
*/
- recordBackedUpBlob(hash) {
- this.#backedUpBlobs.add(hash)
+ recordCompletedBlob(hash) {
this.#completedBlobs.add(hash)
this.#pendingBlobs.delete(hash)
}
@@ -1207,12 +899,8 @@ class ProjectContext {
* @param {string} hash
* @return {boolean}
*/
- hasBackedUpBlob(hash) {
- return (
- this.#pendingBlobs.has(hash) ||
- this.#completedBlobs.has(hash) ||
- this.#backedUpBlobs.has(hash)
- )
+ hasCompletedBlob(hash) {
+ return this.#pendingBlobs.has(hash) || this.#completedBlobs.has(hash)
}
/** @type {Array} */
@@ -1253,9 +941,7 @@ class ProjectContext {
const projectEntries = []
const deletedProjectEntries = []
for (const entry of this.#pendingFileWrites) {
- if (entry.path === MONGO_PATH_DELETED_FILE) {
- individualUpdates.push(entry)
- } else if (entry.path.startsWith('project.')) {
+ if (entry.path.startsWith('project.')) {
deletedProjectEntries.push(entry)
} else {
projectEntries.push(entry)
@@ -1316,6 +1002,51 @@ function estimateBlobSize(blob) {
return size
}
+async function processProjectsFromFile() {
+ const rl = readline.createInterface({
+ input: fs.createReadStream(PROJECT_IDS_FROM),
+ })
+ for await (const projectId of rl) {
+ if (!projectId) continue // skip over trailing new line
+ let project = await typedProjectsCollection.findOne(
+ { _id: new ObjectId(projectId) },
+ { projection: { rootFolder: 1, _id: 1, 'overleaf.history.id': 1 } }
+ )
+ let prefix = 'rootFolder.0'
+ if (!project) {
+ const deletedProject = await typedDeletedProjectsCollection.findOne(
+ { 'deleterData.deletedProjectId': new ObjectId(projectId) },
+ {
+ projection: {
+ 'project.rootFolder': 1,
+ 'project._id': 1,
+ 'project.overleaf.history.id': 1,
+ },
+ }
+ )
+ if (!deletedProject?.project) {
+ logger.warn({ projectId }, 'project hard-deleted')
+ continue
+ }
+ project = deletedProject.project
+ prefix = 'project.rootFolder.0'
+ }
+ if (!project?.overleaf?.history?.id) {
+ logger.warn({ projectId }, 'project has no history id')
+ continue
+ }
+ try {
+ await queueNextBatch([project], prefix)
+ } catch (err) {
+ gracefulShutdownInitiated = true
+ await waitForDeferredQueues()
+ throw err
+ }
+ }
+ await waitForDeferredQueues()
+ console.warn('Done updating projects from input file')
+}
+
async function processNonDeletedProjects() {
try {
await batchedUpdate(
@@ -1366,12 +1097,23 @@ async function processDeletedProjects() {
}
async function main() {
+ console.log('Starting project file backup...')
await loadGlobalBlobs()
- if (PROCESS_NON_DELETED_PROJECTS) {
- await processNonDeletedProjects()
- }
- if (PROCESS_DELETED_PROJECTS) {
- await processDeletedProjects()
+ console.log('Loaded global blobs:', GLOBAL_BLOBS.size)
+ if (PROJECT_IDS_FROM) {
+ console.log(
+ `Processing projects from file: ${PROJECT_IDS_FROM}, this may take a while...`
+ )
+ await processProjectsFromFile()
+ } else {
+ if (PROCESS_NON_DELETED_PROJECTS) {
+ console.log('Processing non-deleted projects...')
+ await processNonDeletedProjects()
+ }
+ if (PROCESS_DELETED_PROJECTS) {
+ console.log('Processing deleted projects...')
+ await processDeletedProjects()
+ }
}
console.warn('Done.')
}
@@ -1407,8 +1149,10 @@ try {
)
code++
}
+ await setTimeout(SLEEP_BEFORE_EXIT)
process.exit(code)
} catch (err) {
console.error(err)
+ await setTimeout(SLEEP_BEFORE_EXIT)
process.exit(1)
}
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash_fix_up.mjs b/services/history-v1/storage/scripts/back_fill_file_hash_fix_up.mjs
new file mode 100644
index 0000000000..2525ee1d6e
--- /dev/null
+++ b/services/history-v1/storage/scripts/back_fill_file_hash_fix_up.mjs
@@ -0,0 +1,629 @@
+// @ts-check
+import Events from 'node:events'
+import fs from 'node:fs'
+import Stream from 'node:stream'
+import { ObjectId } from 'mongodb'
+import logger from '@overleaf/logger'
+import OError from '@overleaf/o-error'
+import { Blob } from 'overleaf-editor-core'
+import {
+ BlobStore,
+ getStringLengthOfFile,
+ makeBlobForFile,
+} from '../lib/blob_store/index.js'
+import { db } from '../lib/mongodb.js'
+import commandLineArgs from 'command-line-args'
+import readline from 'node:readline'
+import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
+import { setTimeout } from 'node:timers/promises'
+
+// Silence warning.
+Events.setMaxListeners(20)
+
+// Enable caching for ObjectId.toString()
+ObjectId.cacheHexString = true
+
+/**
+ * @typedef {import("mongodb").Collection} Collection
+ * @typedef {import("mongodb").Collection} ProjectsCollection
+ * @typedef {import("mongodb").Collection<{project: Project}>} DeletedProjectsCollection
+ */
+
+/**
+ * @typedef {Object} FileRef
+ * @property {ObjectId} _id
+ * @property {string} hash
+ */
+
+/**
+ * @typedef {Object} Folder
+ * @property {Array} folders
+ * @property {Array} fileRefs
+ */
+
+/**
+ * @typedef {Object} Project
+ * @property {ObjectId} _id
+ * @property {Array} rootFolder
+ * @property {{history: {id: (number|string)}}} overleaf
+ */
+
+/**
+ * @return {{FIX_NOT_FOUND: boolean, FIX_HASH_MISMATCH: boolean, FIX_MISSING_HASH: boolean, LOGS: string}}
+ */
+function parseArgs() {
+ const args = commandLineArgs([
+ { name: 'fixNotFound', type: String, defaultValue: 'true' },
+ { name: 'fixHashMismatch', type: String, defaultValue: 'true' },
+ { name: 'fixMissingHash', type: String, defaultValue: 'true' },
+ { name: 'logs', type: String, defaultValue: '' },
+ ])
+ /**
+ * commandLineArgs cannot handle --foo=false, so go the long way
+ * @param {string} name
+ * @return {boolean}
+ */
+ function boolVal(name) {
+ const v = args[name]
+ if (['true', 'false'].includes(v)) return v === 'true'
+ throw new Error(`expected "true" or "false" for boolean option ${name}`)
+ }
+ return {
+ FIX_HASH_MISMATCH: boolVal('fixNotFound'),
+ FIX_NOT_FOUND: boolVal('fixHashMismatch'),
+ FIX_MISSING_HASH: boolVal('fixMissingHash'),
+ LOGS: args.logs,
+ }
+}
+
+const { FIX_HASH_MISMATCH, FIX_NOT_FOUND, FIX_MISSING_HASH, LOGS } = parseArgs()
+if (!LOGS) {
+ throw new Error('--logs parameter missing')
+}
+const BUFFER_DIR = fs.mkdtempSync(
+ process.env.BUFFER_DIR_PREFIX || '/tmp/back_fill_file_hash-'
+)
+const USER_FILES_BUCKET_NAME = process.env.USER_FILES_BUCKET_NAME || ''
+if (!USER_FILES_BUCKET_NAME) {
+ throw new Error('env var USER_FILES_BUCKET_NAME is missing')
+}
+// https://nodejs.org/api/stream.html#streamgetdefaulthighwatermarkobjectmode
+const STREAM_HIGH_WATER_MARK = parseInt(
+ process.env.STREAM_HIGH_WATER_MARK || (64 * 1024).toString(),
+ 10
+)
+const SLEEP_BEFORE_EXIT = parseInt(process.env.SLEEP_BEFORE_EXIT || '1000', 10)
+
+// Filestore endpoint location
+const FILESTORE_HOST = process.env.FILESTORE_HOST || '127.0.0.1'
+const FILESTORE_PORT = process.env.FILESTORE_PORT || '3009'
+
+async function fetchFromFilestore(projectId, fileId) {
+ const url = `http://${FILESTORE_HOST}:${FILESTORE_PORT}/project/${projectId}/file/${fileId}`
+ const response = await fetch(url)
+ if (!response.ok) {
+ if (response.status === 404) {
+ throw new NotFoundError('file not found in filestore', {
+ status: response.status,
+ })
+ }
+ const body = await response.text()
+ throw new OError('fetchFromFilestore failed', {
+ projectId,
+ fileId,
+ status: response.status,
+ body,
+ })
+ }
+ if (!response.body) {
+ throw new OError('fetchFromFilestore response has no body', {
+ projectId,
+ fileId,
+ status: response.status,
+ })
+ }
+ return response.body
+}
+
+/** @type {ProjectsCollection} */
+const projectsCollection = db.collection('projects')
+/** @type {DeletedProjectsCollection} */
+const deletedProjectsCollection = db.collection('deletedProjects')
+
+let gracefulShutdownInitiated = false
+
+process.on('SIGINT', handleSignal)
+process.on('SIGTERM', handleSignal)
+
+function handleSignal() {
+ gracefulShutdownInitiated = true
+ console.warn('graceful shutdown initiated, draining queue')
+}
+
+class FileDeletedError extends OError {}
+
+/** @type {Map} */
+const PROJECT_CACHE = new Map()
+
+/**
+ * @param {string} projectId
+ * @return {Promise<{project: Project, projectSoftDeleted: boolean}>}
+ */
+async function getProject(projectId) {
+ const cached = PROJECT_CACHE.get(projectId)
+ if (cached) return cached
+
+ let projectSoftDeleted
+ let project = await projectsCollection.findOne({
+ _id: new ObjectId(projectId),
+ })
+ if (project) {
+ projectSoftDeleted = false
+ } else {
+ const softDeleted = await deletedProjectsCollection.findOne({
+ 'deleterData.deletedProjectId': new ObjectId(projectId),
+ project: { $exists: true },
+ })
+ if (!softDeleted) {
+ throw new OError('project hard-deleted')
+ }
+ project = softDeleted.project
+ projectSoftDeleted = true
+ }
+ PROJECT_CACHE.set(projectId, { projectSoftDeleted, project })
+ return { projectSoftDeleted, project }
+}
+
+/**
+ * @param {Folder} folder
+ * @param {string} fileId
+ * @return {{path: string, fileRef: FileRef, folder: Folder}|null}
+ */
+function getFileTreePath(folder, fileId) {
+ if (!folder) return null
+ let idx = 0
+ if (Array.isArray(folder.fileRefs)) {
+ for (const fileRef of folder.fileRefs) {
+ if (fileRef?._id.toString() === fileId) {
+ return {
+ fileRef,
+ path: `.fileRefs.${idx}`,
+ folder,
+ }
+ }
+ idx++
+ }
+ }
+ idx = 0
+ if (Array.isArray(folder.folders)) {
+ for (const child of folder.folders) {
+ const match = getFileTreePath(child, fileId)
+ if (match) {
+ return {
+ fileRef: match.fileRef,
+ folder: match.folder,
+ path: `.folders.${idx}${match.path}`,
+ }
+ }
+ idx++
+ }
+ }
+ return null
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @return {Promise<{fileRef: FileRef, folder: Folder, fullPath: string, query: Object, projectSoftDeleted: boolean}>}
+ */
+async function findFile(projectId, fileId) {
+ const { projectSoftDeleted, project } = await getProject(projectId)
+ const match = getFileTreePath(project.rootFolder[0], fileId)
+ if (!match) {
+ throw new FileDeletedError('file not found in file-tree', {
+ projectSoftDeleted,
+ })
+ }
+ const { path, fileRef, folder } = match
+ let fullPath
+ let query
+ if (projectSoftDeleted) {
+ fullPath = `project.rootFolder.0${path}`
+ query = {
+ 'deleterData.deletedProjectId': new ObjectId(projectId),
+ [`${fullPath}._id`]: new ObjectId(fileId),
+ }
+ } else {
+ fullPath = `rootFolder.0${path}`
+ query = {
+ _id: new ObjectId(projectId),
+ [`${fullPath}._id`]: new ObjectId(fileId),
+ }
+ }
+ return {
+ projectSoftDeleted,
+ query,
+ fullPath,
+ fileRef,
+ folder,
+ }
+}
+
+/**
+ * @param {string} line
+ * @return {Promise}
+ */
+async function fixNotFound(line) {
+ const { projectId, fileId, bucketName } = JSON.parse(line)
+ if (bucketName !== USER_FILES_BUCKET_NAME) {
+ throw new OError('not found case for another bucket')
+ }
+
+ const { projectSoftDeleted, query, fullPath, fileRef, folder } =
+ await findFile(projectId, fileId)
+ logger.info({ projectId, fileId, fileRef }, 'removing fileRef')
+ // Copied from _removeElementFromMongoArray (https://github.com/overleaf/internal/blob/11e09528c153de6b7766d18c3c90d94962190371/services/web/app/src/Features/Project/ProjectEntityMongoUpdateHandler.js)
+ const nonArrayPath = fullPath.slice(0, fullPath.lastIndexOf('.'))
+ let result
+ if (projectSoftDeleted) {
+ result = await deletedProjectsCollection.updateOne(query, {
+ $pull: { [nonArrayPath]: { _id: new ObjectId(fileId) } },
+ $inc: { 'project.version': 1 },
+ })
+ } else {
+ result = await projectsCollection.updateOne(query, {
+ $pull: { [nonArrayPath]: { _id: new ObjectId(fileId) } },
+ $inc: { version: 1 },
+ })
+ }
+ if (result.matchedCount !== 1) {
+ throw new OError('file-tree write did not match', { result })
+ }
+ // Update the cache. The mongo-path of the next file will be off otherwise.
+ folder.fileRefs = folder.fileRefs.filter(f => !f._id.equals(fileId))
+ return true
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} hash
+ * @return {Promise}
+ */
+async function setHashInMongo(projectId, fileId, hash) {
+ const { projectSoftDeleted, query, fullPath, fileRef } = await findFile(
+ projectId,
+ fileId
+ )
+ if (fileRef.hash === hash) return
+ logger.info({ projectId, fileId, fileRef, hash }, 'setting fileRef hash')
+ let result
+ if (projectSoftDeleted) {
+ result = await deletedProjectsCollection.updateOne(query, {
+ $set: { [`${fullPath}.hash`]: hash },
+ $inc: { 'project.version': 1 },
+ })
+ } else {
+ result = await projectsCollection.updateOne(query, {
+ $set: { [`${fullPath}.hash`]: hash },
+ $inc: { version: 1 },
+ })
+ }
+ if (result.matchedCount !== 1) {
+ throw new OError('file-tree write did not match', { result })
+ }
+ fileRef.hash = hash // Update cache for completeness.
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} historyId
+ * @return {Promise}
+ */
+async function importRestoredFilestoreFile(projectId, fileId, historyId) {
+ const path = `${BUFFER_DIR}/${projectId}_${fileId}`
+ try {
+ let s
+ try {
+ s = await fetchFromFilestore(projectId, fileId)
+ } catch (err) {
+ if (err instanceof NotFoundError) {
+ throw new OError('missing blob, need to restore filestore file', {
+ projectId,
+ fileId,
+ })
+ }
+ throw err
+ }
+ await Stream.promises.pipeline(
+ s,
+ fs.createWriteStream(path, { highWaterMark: STREAM_HIGH_WATER_MARK })
+ )
+ const blobStore = new BlobStore(historyId)
+ const blob = await blobStore.putFile(path)
+ await setHashInMongo(projectId, fileId, blob.getHash())
+ } finally {
+ await fs.promises.rm(path, { force: true })
+ }
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} path
+ * @return {Promise}
+ */
+async function bufferFilestoreFileToDisk(projectId, fileId, path) {
+ try {
+ await Stream.promises.pipeline(
+ await fetchFromFilestore(projectId, fileId),
+ fs.createWriteStream(path, { highWaterMark: STREAM_HIGH_WATER_MARK })
+ )
+ const blob = await makeBlobForFile(path)
+ blob.setStringLength(
+ await getStringLengthOfFile(blob.getByteLength(), path)
+ )
+ return blob
+ } catch (err) {
+ if (err instanceof NotFoundError) {
+ throw new OError('missing blob, need to restore filestore file', {
+ projectId,
+ fileId,
+ })
+ }
+ throw err
+ }
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @return {Promise}
+ */
+async function computeFilestoreFileHash(projectId, fileId) {
+ const path = `${BUFFER_DIR}/${projectId}_${fileId}`
+ try {
+ const blob = await bufferFilestoreFileToDisk(projectId, fileId, path)
+ return blob.getHash()
+ } finally {
+ await fs.promises.rm(path, { force: true })
+ }
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @return {Promise}
+ */
+async function uploadFilestoreFile(projectId, fileId) {
+ const path = `${BUFFER_DIR}/${projectId}_${fileId}`
+ try {
+ const blob = await bufferFilestoreFileToDisk(projectId, fileId, path)
+ const hash = blob.getHash()
+ try {
+ await ensureBlobExistsForFile(projectId, fileId, hash)
+ } catch (err) {
+ if (!(err instanceof Blob.NotFoundError)) throw err
+
+ const { project } = await getProject(projectId)
+ const historyId = project.overleaf.history.id.toString()
+ const blobStore = new BlobStore(historyId)
+ await blobStore.putBlob(path, blob)
+ await ensureBlobExistsForFile(projectId, fileId, hash)
+ }
+ } finally {
+ await fs.promises.rm(path, { force: true })
+ }
+}
+
+/**
+ * @param {string} line
+ * @return {Promise}
+ */
+async function fixHashMismatch(line) {
+ const {
+ projectId,
+ fileId,
+ hash: computedHash,
+ entry: {
+ hash: fileTreeHash,
+ ctx: { historyId },
+ },
+ } = JSON.parse(line)
+ const blobStore = new BlobStore(historyId)
+ if (await blobStore.getBlob(fileTreeHash)) {
+ throw new OError('found blob with computed filestore object hash')
+ }
+ if (!(await blobStore.getBlob(computedHash))) {
+ await importRestoredFilestoreFile(projectId, fileId, historyId)
+ return true
+ }
+ return await ensureBlobExistsForFile(projectId, fileId, computedHash)
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} hash
+ * @return {Promise}
+ */
+async function hashAlreadyUpdatedInFileTree(projectId, fileId, hash) {
+ const { fileRef } = await findFile(projectId, fileId)
+ return fileRef.hash === hash
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} hash
+ * @return {Promise}
+ */
+async function ensureBlobExistsForFile(projectId, fileId, hash) {
+ const { project } = await getProject(projectId)
+ const historyId = project.overleaf.history.id.toString()
+ const blobStore = new BlobStore(historyId)
+ if (
+ (await hashAlreadyUpdatedInFileTree(projectId, fileId, hash)) &&
+ (await blobStore.getBlob(hash))
+ ) {
+ return false // already processed
+ }
+
+ const stream = await blobStore.getStream(hash)
+ const path = `${BUFFER_DIR}/${historyId}_${hash}`
+ try {
+ await Stream.promises.pipeline(
+ stream,
+ fs.createWriteStream(path, {
+ highWaterMark: STREAM_HIGH_WATER_MARK,
+ })
+ )
+
+ const writtenBlob = await makeBlobForFile(path)
+ writtenBlob.setStringLength(
+ await getStringLengthOfFile(writtenBlob.getByteLength(), path)
+ )
+ if (writtenBlob.getHash() !== hash) {
+ // Double check download, better safe than sorry.
+ throw new OError('blob corrupted', { writtenBlob, hash })
+ }
+
+ let blob = await blobStore.getBlob(hash)
+ if (!blob) {
+ // Calling blobStore.putBlob would result in the same error again.
+ // HACK: Skip upload to GCS and finalize putBlob operation directly.
+ await blobStore.backend.insertBlob(historyId, writtenBlob)
+ }
+ } finally {
+ await fs.promises.rm(path, { force: true })
+ }
+ await setHashInMongo(projectId, fileId, hash)
+ return true
+}
+
+/**
+ * @param {string} line
+ * @return {Promise}
+ */
+async function fixMissingHash(line) {
+ let { projectId, _id: fileId } = JSON.parse(line)
+ const {
+ fileRef: { hash },
+ } = await findFile(projectId, fileId)
+ if (hash) {
+ // processed, double check
+ return await ensureBlobExistsForFile(projectId, fileId, hash)
+ }
+ await uploadFilestoreFile(projectId, fileId)
+ return true
+}
+
+const CASES = {
+ 'not found': {
+ match: 'NotFoundError',
+ flag: FIX_NOT_FOUND,
+ action: fixNotFound,
+ },
+ 'hash mismatch': {
+ match: 'OError: hash mismatch',
+ flag: FIX_HASH_MISMATCH,
+ action: fixHashMismatch,
+ },
+ 'missing file hash': {
+ match: '"bad file hash"',
+ flag: FIX_MISSING_HASH,
+ action: fixMissingHash,
+ },
+}
+
+const STATS = {
+ processedLines: 0,
+ success: 0,
+ alreadyProcessed: 0,
+ fileDeleted: 0,
+ skipped: 0,
+ failed: 0,
+ unmatched: 0,
+}
+function logStats() {
+ console.log(
+ JSON.stringify({
+ time: new Date(),
+ gracefulShutdownInitiated,
+ ...STATS,
+ })
+ )
+}
+setInterval(logStats, 10_000)
+
+async function processLog() {
+ const rl = readline.createInterface({
+ input: fs.createReadStream(LOGS),
+ })
+ nextLine: for await (const line of rl) {
+ if (gracefulShutdownInitiated) break
+ STATS.processedLines++
+ if (
+ !(
+ line.includes('"failed to process file"') ||
+ // Process missing hashes as flagged by find_malformed_filetrees.mjs
+ line.includes('"bad file-tree path"')
+ )
+ ) {
+ continue
+ }
+
+ for (const [name, { match, flag, action }] of Object.entries(CASES)) {
+ if (!line.includes(match)) continue
+ if (flag) {
+ try {
+ if (await action(line)) {
+ STATS.success++
+ } else {
+ STATS.alreadyProcessed++
+ }
+ } catch (err) {
+ if (err instanceof FileDeletedError) {
+ STATS.fileDeleted++
+ logger.info({ err, line }, 'file deleted, skipping')
+ } else {
+ STATS.failed++
+ logger.error({ err, line }, `failed to fix ${name}`)
+ }
+ }
+ } else {
+ STATS.skipped++
+ }
+ continue nextLine
+ }
+ STATS.unmatched++
+ logger.warn({ line }, 'unknown fatal error')
+ }
+}
+
+async function main() {
+ try {
+ await processLog()
+ } finally {
+ logStats()
+ try {
+ await fs.promises.rm(BUFFER_DIR, { recursive: true, force: true })
+ } catch (err) {
+ console.error(`Cleanup of BUFFER_DIR=${BUFFER_DIR} failed`, err)
+ }
+ }
+ const { skipped, failed, unmatched } = STATS
+ await setTimeout(SLEEP_BEFORE_EXIT)
+ if (failed > 0) {
+ process.exit(Math.min(failed, 99))
+ } else if (unmatched > 0) {
+ process.exit(100)
+ } else if (skipped > 0) {
+ process.exit(101)
+ } else {
+ process.exit(0)
+ }
+}
+
+await main()
diff --git a/services/history-v1/storage/scripts/backup.mjs b/services/history-v1/storage/scripts/backup.mjs
new file mode 100644
index 0000000000..8cbbadfe12
--- /dev/null
+++ b/services/history-v1/storage/scripts/backup.mjs
@@ -0,0 +1,1104 @@
+// @ts-check
+
+import logger from '@overleaf/logger'
+import commandLineArgs from 'command-line-args'
+import { Chunk, History, Snapshot } from 'overleaf-editor-core'
+import {
+ getProjectChunks,
+ getLatestChunkMetadata,
+ create,
+} from '../lib/chunk_store/index.js'
+import { client } from '../lib/mongodb.js'
+import redis from '../lib/redis.js'
+import knex from '../lib/knex.js'
+import { historyStore } from '../lib/history_store.js'
+import pLimit from 'p-limit'
+import {
+ GLOBAL_BLOBS,
+ loadGlobalBlobs,
+ makeProjectKey,
+ BlobStore,
+} from '../lib/blob_store/index.js'
+import {
+ listPendingBackups,
+ getBackupStatus,
+ setBackupVersion,
+ updateCurrentMetadataIfNotSet,
+ updatePendingChangeTimestamp,
+ getBackedUpBlobHashes,
+ unsetBackedUpBlobHashes,
+} from '../lib/backup_store/index.js'
+import { backupBlob, downloadBlobToDir } from '../lib/backupBlob.mjs'
+import {
+ backupPersistor,
+ chunksBucket,
+ projectBlobsBucket,
+} from '../lib/backupPersistor.mjs'
+import { backupGenerator } from '../lib/backupGenerator.mjs'
+import { promises as fs, createWriteStream } from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import projectKey from '../lib/project_key.js'
+import Crypto from 'node:crypto'
+import Stream from 'node:stream'
+import { EventEmitter } from 'node:events'
+import {
+ objectIdFromInput,
+ batchedUpdate,
+ READ_PREFERENCE_SECONDARY,
+} from '@overleaf/mongo-utils/batchedUpdate.js'
+import { createGunzip } from 'node:zlib'
+import { text } from 'node:stream/consumers'
+import { fromStream as blobHashFromStream } from '../lib/blob_hash.js'
+import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
+
+// Create a singleton promise that loads global blobs once
+let globalBlobsPromise = null
+function ensureGlobalBlobsLoaded() {
+ if (!globalBlobsPromise) {
+ globalBlobsPromise = loadGlobalBlobs()
+ }
+ return globalBlobsPromise
+}
+
+EventEmitter.defaultMaxListeners = 20
+
+logger.initialize('history-v1-backup')
+
+// Settings shared between command-line and module usage
+let DRY_RUN = false
+let RETRY_LIMIT = 3
+const RETRY_DELAY = 1000
+let CONCURRENCY = 4
+let BATCH_CONCURRENCY = 1
+let BLOB_LIMITER = pLimit(CONCURRENCY)
+let USE_SECONDARY = false
+
+/**
+ * Configure backup settings
+ * @param {Object} options Backup configuration options
+ */
+export function configureBackup(options = {}) {
+ DRY_RUN = options.dryRun || false
+ RETRY_LIMIT = options.retries || 3
+ CONCURRENCY = options.concurrency || 1
+ BATCH_CONCURRENCY = options.batchConcurrency || 1
+ BLOB_LIMITER = pLimit(CONCURRENCY)
+ USE_SECONDARY = options.useSecondary || false
+}
+
+let gracefulShutdownInitiated = false
+
+process.on('SIGINT', handleSignal)
+process.on('SIGTERM', handleSignal)
+
+function handleSignal() {
+ gracefulShutdownInitiated = true
+ logger.info({}, 'graceful shutdown initiated, draining queue')
+}
+
+async function retry(fn, times, delayMs) {
+ let attempts = times
+ while (attempts > 0) {
+ try {
+ const result = await fn()
+ return result
+ } catch (err) {
+ attempts--
+ if (attempts === 0) throw err
+ await new Promise(resolve => setTimeout(resolve, delayMs))
+ }
+ }
+}
+
+function wrapWithRetry(fn, retries, delayMs) {
+ return async (...args) => {
+ const result = await retry(() => fn(...args), retries, delayMs)
+ return result
+ }
+}
+
+const downloadWithRetry = wrapWithRetry(
+ downloadBlobToDir,
+ RETRY_LIMIT,
+ RETRY_DELAY
+)
+// FIXME: this creates a new backupPersistor for each blob
+// so there is no caching of the DEK
+const backupWithRetry = wrapWithRetry(backupBlob, RETRY_LIMIT, RETRY_DELAY)
+
+async function findNewBlobs(projectId, blobs) {
+ const newBlobs = []
+ const existingBackedUpBlobHashes = await getBackedUpBlobHashes(projectId)
+ for (const blob of blobs) {
+ const hash = blob.getHash()
+ if (existingBackedUpBlobHashes.has(blob.getHash())) {
+ logger.debug({ projectId, hash }, 'Blob is already backed up, skipping')
+ continue
+ }
+ const globalBlob = GLOBAL_BLOBS.get(hash)
+ if (globalBlob && !globalBlob.demoted) {
+ logger.debug(
+ { projectId, hash },
+ 'Blob is a global blob and not demoted, skipping'
+ )
+ continue
+ }
+ newBlobs.push(blob)
+ }
+ return newBlobs
+}
+
+async function cleanBackedUpBlobs(projectId, blobs) {
+ const hashes = blobs.map(blob => blob.getHash())
+ if (DRY_RUN) {
+ console.log(
+ 'Would remove blobs',
+ hashes.join(' '),
+ 'from project',
+ projectId
+ )
+ return
+ }
+ await unsetBackedUpBlobHashes(projectId, hashes)
+}
+
+async function backupSingleBlob(projectId, historyId, blob, tmpDir, persistor) {
+ if (DRY_RUN) {
+ console.log(
+ 'Would back up blob',
+ JSON.stringify(blob),
+ 'in history',
+ historyId,
+ 'for project',
+ projectId
+ )
+ return
+ }
+ logger.debug({ blob, historyId }, 'backing up blob')
+ const blobPath = await downloadWithRetry(historyId, blob, tmpDir)
+ await backupWithRetry(historyId, blob, blobPath, persistor)
+}
+
+async function backupBlobs(projectId, historyId, blobs, limiter, persistor) {
+ let tmpDir
+ try {
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'blob-backup-'))
+
+ const blobBackupOperations = blobs.map(blob =>
+ limiter(backupSingleBlob, projectId, historyId, blob, tmpDir, persistor)
+ )
+
+ // Reject if any blob backup fails
+ await Promise.all(blobBackupOperations)
+ } finally {
+ if (tmpDir) {
+ await fs.rm(tmpDir, { recursive: true, force: true })
+ }
+ }
+}
+
+async function backupChunk(
+ projectId,
+ historyId,
+ chunkBackupPersistorForProject,
+ chunkToBackup,
+ chunkRecord,
+ chunkBuffer
+) {
+ if (DRY_RUN) {
+ console.log(
+ 'Would back up chunk',
+ JSON.stringify(chunkRecord),
+ 'in history',
+ historyId,
+ 'for project',
+ projectId,
+ 'key',
+ makeChunkKey(historyId, chunkToBackup.startVersion)
+ )
+ return
+ }
+ const key = makeChunkKey(historyId, chunkToBackup.startVersion)
+ logger.debug({ chunkRecord, historyId, projectId, key }, 'backing up chunk')
+ const md5 = Crypto.createHash('md5').update(chunkBuffer)
+ await chunkBackupPersistorForProject.sendStream(
+ chunksBucket,
+ makeChunkKey(historyId, chunkToBackup.startVersion),
+ Stream.Readable.from([chunkBuffer]),
+ {
+ contentType: 'application/json',
+ contentEncoding: 'gzip',
+ contentLength: chunkBuffer.byteLength,
+ sourceMd5: md5.digest('hex'),
+ }
+ )
+}
+
+async function updateBackupStatus(
+ projectId,
+ lastBackedUpVersion,
+ chunkRecord,
+ startOfBackupTime
+) {
+ if (DRY_RUN) {
+ console.log(
+ 'Would set backup version to',
+ chunkRecord.endVersion,
+ 'with lastBackedUpTimestamp',
+ startOfBackupTime
+ )
+ return
+ }
+ logger.debug(
+ { projectId, chunkRecord, startOfBackupTime },
+ 'setting backupVersion and lastBackedUpTimestamp'
+ )
+ await setBackupVersion(
+ projectId,
+ lastBackedUpVersion,
+ chunkRecord.endVersion,
+ startOfBackupTime
+ )
+}
+
+// Define command-line options
+const optionDefinitions = [
+ {
+ name: 'projectId',
+ alias: 'p',
+ type: String,
+ description: 'The ID of the project to backup',
+ defaultOption: true,
+ },
+ {
+ name: 'help',
+ alias: 'h',
+ type: Boolean,
+ description: 'Display this usage guide.',
+ },
+ {
+ name: 'status',
+ alias: 's',
+ type: Boolean,
+ description: 'Display project status.',
+ },
+ {
+ name: 'list',
+ alias: 'l',
+ type: Boolean,
+ description: 'List projects that need to be backed up',
+ },
+ {
+ name: 'dry-run',
+ alias: 'n',
+ type: Boolean,
+ description: 'Perform a dry run without making any changes.',
+ },
+ {
+ name: 'retries',
+ alias: 'r',
+ type: Number,
+ description: 'Number of retries, default is 3.',
+ },
+ {
+ name: 'concurrency',
+ alias: 'c',
+ type: Number,
+ description: 'Number of concurrent blob downloads (default: 1)',
+ },
+ {
+ name: 'batch-concurrency',
+ alias: 'b',
+ type: Number,
+ description: 'Number of concurrent project operations (default: 1)',
+ },
+ {
+ name: 'pending',
+ alias: 'P',
+ type: Boolean,
+ description: 'Backup all pending projects.',
+ },
+ {
+ name: 'interval',
+ alias: 'i',
+ type: Number,
+ description: 'Time interval in seconds for pending backups (default: 3600)',
+ defaultValue: 3600,
+ },
+ {
+ name: 'fix',
+ type: Number,
+ description: 'Fix projects without chunks',
+ },
+ {
+ name: 'init',
+ alias: 'I',
+ type: Boolean,
+ description: 'Initialize backups for all projects.',
+ },
+ { name: 'output', alias: 'o', type: String, description: 'Output file' },
+ {
+ name: 'start-date',
+ type: String,
+ description: 'Start date for initialization (ISO format)',
+ },
+ {
+ name: 'end-date',
+ type: String,
+ description: 'End date for initialization (ISO format)',
+ },
+ {
+ name: 'use-secondary',
+ type: Boolean,
+ description: 'Use secondary read preference for backup status',
+ },
+ {
+ name: 'compare',
+ alias: 'C',
+ type: Boolean,
+ description:
+ 'Compare backup with original chunks. With --start-date and --end-date compares all projects in range.',
+ },
+]
+
+function handleOptions() {
+ const options = commandLineArgs(optionDefinitions)
+
+ if (options.help) {
+ console.log('Usage:')
+ optionDefinitions.forEach(option => {
+ console.log(` --${option.name}, -${option.alias}: ${option.description}`)
+ })
+ process.exit(0)
+ }
+
+ const projectIdRequired =
+ !options.list &&
+ !options.pending &&
+ !options.init &&
+ !(options.fix >= 0) &&
+ !(options.compare && options['start-date'] && options['end-date'])
+
+ if (projectIdRequired && !options.projectId) {
+ console.error('Error: projectId is required')
+ process.exit(1)
+ }
+
+ if (options.pending && options.projectId) {
+ console.error('Error: --pending cannot be specified with projectId')
+ process.exit(1)
+ }
+
+ if (options.pending && (options.list || options.status)) {
+ console.error('Error: --pending is exclusive with --list and --status')
+ process.exit(1)
+ }
+
+ if (options.init && options.pending) {
+ console.error('Error: --init cannot be specified with --pending')
+ process.exit(1)
+ }
+
+ if (
+ (options['start-date'] || options['end-date']) &&
+ !options.init &&
+ !options.compare
+ ) {
+ console.error(
+ 'Error: date options can only be used with --init or --compare'
+ )
+ process.exit(1)
+ }
+
+ if (options['use-secondary']) {
+ USE_SECONDARY = true
+ }
+
+ if (
+ options.compare &&
+ !options.projectId &&
+ !(options['start-date'] && options['end-date'])
+ ) {
+ console.error(
+ 'Error: --compare requires either projectId or both --start-date and --end-date'
+ )
+ process.exit(1)
+ }
+
+ DRY_RUN = options['dry-run'] || false
+ RETRY_LIMIT = options.retries || 3
+ CONCURRENCY = options.concurrency || 1
+ BATCH_CONCURRENCY = options['batch-concurrency'] || 1
+ BLOB_LIMITER = pLimit(CONCURRENCY)
+ return options
+}
+
+async function displayBackupStatus(projectId) {
+ const result = await analyseBackupStatus(projectId)
+ console.log('Backup status:', JSON.stringify(result))
+}
+
+async function analyseBackupStatus(projectId) {
+ const { backupStatus, historyId, currentEndVersion, currentEndTimestamp } =
+ await getBackupStatus(projectId)
+ // TODO: when we have confidence that the latestChunkMetadata always matches
+ // the values from the backupStatus we can skip loading it here
+ const latestChunkMetadata = await getLatestChunkMetadata(historyId, {
+ readOnly: Boolean(USE_SECONDARY),
+ })
+ if (
+ currentEndVersion &&
+ currentEndVersion !== latestChunkMetadata.endVersion
+ ) {
+ // compare the current end version with the latest chunk metadata to check that
+ // the updates to the project collection are reliable
+ // expect some failures due to the time window between getBackupStatus and
+ // getLatestChunkMetadata where the project is being actively edited.
+ logger.warn(
+ {
+ projectId,
+ historyId,
+ currentEndVersion,
+ currentEndTimestamp,
+ latestChunkMetadata,
+ },
+ 'currentEndVersion does not match latest chunk metadata'
+ )
+ }
+
+ if (DRY_RUN) {
+ console.log('Project:', projectId)
+ console.log('History ID:', historyId)
+ console.log('Latest Chunk Metadata:', JSON.stringify(latestChunkMetadata))
+ console.log('Current end version:', currentEndVersion)
+ console.log('Current end timestamp:', currentEndTimestamp)
+ console.log('Backup status:', backupStatus ?? 'none')
+ }
+ if (!backupStatus) {
+ if (DRY_RUN) {
+ console.log('No backup status found - doing full backup')
+ }
+ }
+ const lastBackedUpVersion = backupStatus?.lastBackedUpVersion
+ const endVersion = latestChunkMetadata.endVersion
+ if (endVersion >= 0 && endVersion === lastBackedUpVersion) {
+ if (DRY_RUN) {
+ console.log(
+ 'Project is up to date, last backed up at version',
+ lastBackedUpVersion
+ )
+ }
+ } else if (endVersion < lastBackedUpVersion) {
+ throw new Error('backup is ahead of project')
+ } else {
+ if (DRY_RUN) {
+ console.log(
+ 'Project needs to be backed up from',
+ lastBackedUpVersion,
+ 'to',
+ endVersion
+ )
+ }
+ }
+
+ return {
+ historyId,
+ lastBackedUpVersion,
+ currentVersion: latestChunkMetadata.endVersion || 0,
+ upToDate: endVersion >= 0 && lastBackedUpVersion === endVersion,
+ pendingChangeAt: backupStatus?.pendingChangeAt,
+ currentEndVersion,
+ currentEndTimestamp,
+ latestChunkMetadata,
+ }
+}
+
+async function displayPendingBackups(options) {
+ const intervalMs = options.interval * 1000
+ for await (const project of listPendingBackups(intervalMs)) {
+ console.log(
+ 'Project:',
+ project._id.toHexString(),
+ 'backup status:',
+ JSON.stringify(project.overleaf.backup),
+ 'history status:',
+ JSON.stringify(project.overleaf.history, [
+ 'currentEndVersion',
+ 'currentEndTimestamp',
+ ])
+ )
+ }
+}
+
+function makeChunkKey(projectId, startVersion) {
+ return path.join(projectKey.format(projectId), projectKey.pad(startVersion))
+}
+
+export async function backupProject(projectId, options) {
+ if (gracefulShutdownInitiated) {
+ return
+ }
+ await ensureGlobalBlobsLoaded()
+ // FIXME: flush the project first!
+ // Let's assume the the flush happens externally and triggers this backup
+ const backupStartTime = new Date()
+ // find the last backed up version
+ const {
+ historyId,
+ lastBackedUpVersion,
+ currentVersion,
+ upToDate,
+ pendingChangeAt,
+ currentEndVersion,
+ latestChunkMetadata,
+ } = await analyseBackupStatus(projectId)
+
+ if (upToDate) {
+ logger.debug(
+ {
+ projectId,
+ historyId,
+ lastBackedUpVersion,
+ currentVersion,
+ pendingChangeAt,
+ },
+ 'backup is up to date'
+ )
+
+ if (
+ currentEndVersion === undefined &&
+ latestChunkMetadata.endVersion >= 0
+ ) {
+ if (DRY_RUN) {
+ console.log('Would update current metadata to', latestChunkMetadata)
+ } else {
+ await updateCurrentMetadataIfNotSet(projectId, latestChunkMetadata)
+ }
+ }
+
+ // clear the pending changes timestamp if the backup is complete
+ if (pendingChangeAt) {
+ if (DRY_RUN) {
+ console.log(
+ 'Would update or clear pending changes timestamp',
+ backupStartTime
+ )
+ } else {
+ await updatePendingChangeTimestamp(projectId, backupStartTime)
+ }
+ }
+ return
+ }
+
+ logger.debug(
+ {
+ projectId,
+ historyId,
+ lastBackedUpVersion,
+ currentVersion,
+ pendingChangeAt,
+ },
+ 'backing up project'
+ )
+
+ // this persistor works for both the chunks and blobs buckets,
+ // because they use the same DEK
+ const backupPersistorForProject = await backupPersistor.forProject(
+ chunksBucket,
+ makeProjectKey(historyId, '')
+ )
+
+ let previousBackedUpVersion = lastBackedUpVersion
+ const backupVersions = [previousBackedUpVersion]
+
+ for await (const {
+ blobsToBackup,
+ chunkToBackup,
+ chunkRecord,
+ chunkBuffer,
+ } of backupGenerator(historyId, lastBackedUpVersion)) {
+ // backup the blobs first
+ // this can be done in parallel but must fail if any blob cannot be backed up
+ // if the blob already exists in the backup then that is allowed
+ const newBlobs = await findNewBlobs(projectId, blobsToBackup)
+
+ await backupBlobs(
+ projectId,
+ historyId,
+ newBlobs,
+ BLOB_LIMITER,
+ backupPersistorForProject
+ )
+
+ // then backup the original compressed chunk using the startVersion as the key
+ await backupChunk(
+ projectId,
+ historyId,
+ backupPersistorForProject,
+ chunkToBackup,
+ chunkRecord,
+ chunkBuffer
+ )
+
+ // persist the backup status in mongo for the current chunk
+ try {
+ await updateBackupStatus(
+ projectId,
+ previousBackedUpVersion,
+ chunkRecord,
+ backupStartTime
+ )
+ } catch (err) {
+ logger.error(
+ { projectId, chunkRecord, err, backupVersions },
+ 'error updating backup status'
+ )
+ throw err
+ }
+
+ previousBackedUpVersion = chunkRecord.endVersion
+ backupVersions.push(previousBackedUpVersion)
+
+ await cleanBackedUpBlobs(projectId, blobsToBackup)
+ }
+
+ // update the current end version and timestamp if they are not set
+ if (currentEndVersion === undefined && latestChunkMetadata.endVersion >= 0) {
+ if (DRY_RUN) {
+ console.log('Would update current metadata to', latestChunkMetadata)
+ } else {
+ await updateCurrentMetadataIfNotSet(projectId, latestChunkMetadata)
+ }
+ }
+
+ // clear the pending changes timestamp if the backup is complete, otherwise set it to the time
+ // when the backup started (to pick up the new changes on the next backup)
+ if (DRY_RUN) {
+ console.log(
+ 'Would update or clear pending changes timestamp',
+ backupStartTime
+ )
+ } else {
+ await updatePendingChangeTimestamp(projectId, backupStartTime)
+ }
+}
+
+function convertToISODate(dateStr) {
+ // Expecting YYYY-MM-DD format
+ if (!/^\d{4}-\d{2}-\d{2}$/.test(dateStr)) {
+ throw new Error('Date must be in YYYY-MM-DD format')
+ }
+ return new Date(dateStr + 'T00:00:00.000Z').toISOString()
+}
+
+export async function fixProjectsWithoutChunks(options) {
+ const limit = options.fix || 1
+ const query = {
+ 'overleaf.history.id': { $exists: true },
+ 'overleaf.backup.lastBackedUpVersion': { $in: [null] },
+ }
+ const cursor = client
+ .db()
+ .collection('projects')
+ .find(query, {
+ projection: { _id: 1, 'overleaf.history.id': 1 },
+ readPreference: READ_PREFERENCE_SECONDARY,
+ })
+ .limit(limit)
+ for await (const project of cursor) {
+ const historyId = project.overleaf.history.id.toString()
+ const chunks = await getProjectChunks(historyId)
+ if (chunks.length > 0) {
+ continue
+ }
+ if (DRY_RUN) {
+ console.log(
+ 'Would create new chunk for Project ID:',
+ project._id.toHexString(),
+ 'History ID:',
+ historyId,
+ 'Chunks:',
+ chunks
+ )
+ } else {
+ console.log(
+ 'Creating new chunk for Project ID:',
+ project._id.toHexString(),
+ 'History ID:',
+ historyId,
+ 'Chunks:',
+ chunks
+ )
+ const snapshot = new Snapshot()
+ const history = new History(snapshot, [])
+ const chunk = new Chunk(history, 0)
+ await create(historyId, chunk)
+ const newChunks = await getProjectChunks(historyId)
+ console.log('New chunk:', newChunks)
+ }
+ }
+}
+
+export async function initializeProjects(options) {
+ await ensureGlobalBlobsLoaded()
+ let totalErrors = 0
+ let totalProjects = 0
+
+ const query = {
+ 'overleaf.backup.lastBackedUpVersion': { $in: [null] },
+ }
+
+ if (options['start-date'] && options['end-date']) {
+ query._id = {
+ $gte: objectIdFromInput(convertToISODate(options['start-date'])),
+ $lt: objectIdFromInput(convertToISODate(options['end-date'])),
+ }
+ }
+
+ const cursor = client
+ .db()
+ .collection('projects')
+ .find(query, {
+ projection: { _id: 1 },
+ readPreference: READ_PREFERENCE_SECONDARY,
+ })
+
+ if (options.output) {
+ console.log("Writing project IDs to file: '" + options.output + "'")
+ const output = createWriteStream(options.output)
+ for await (const project of cursor) {
+ output.write(project._id.toHexString() + '\n')
+ totalProjects++
+ }
+ output.end()
+ console.log('Wrote ' + totalProjects + ' project IDs to file')
+ return
+ }
+
+ for await (const project of cursor) {
+ if (gracefulShutdownInitiated) {
+ console.warn('graceful shutdown: stopping project initialization')
+ break
+ }
+ totalProjects++
+ const projectId = project._id.toHexString()
+ try {
+ await backupProject(projectId, options)
+ } catch (err) {
+ totalErrors++
+ logger.error({ projectId, err }, 'error backing up project')
+ }
+ }
+
+ return { errors: totalErrors, projects: totalProjects }
+}
+
+async function backupPendingProjects(options) {
+ const intervalMs = options.interval * 1000
+ for await (const project of listPendingBackups(intervalMs)) {
+ if (gracefulShutdownInitiated) {
+ console.warn('graceful shutdown: stopping pending project backups')
+ break
+ }
+ const projectId = project._id.toHexString()
+ console.log(`Backing up pending project with ID: ${projectId}`)
+ await backupProject(projectId, options)
+ }
+}
+
+class BlobComparator {
+ constructor(backupPersistorForProject) {
+ this.cache = new Map()
+ this.backupPersistorForProject = backupPersistorForProject
+ }
+
+ async compareBlob(historyId, blob) {
+ let computedHash = this.cache.get(blob.hash)
+ const fromCache = !!computedHash
+
+ if (!computedHash) {
+ const blobKey = makeProjectKey(historyId, blob.hash)
+ const backupBlobStream =
+ await this.backupPersistorForProject.getObjectStream(
+ projectBlobsBucket,
+ blobKey,
+ { autoGunzip: true }
+ )
+ computedHash = await blobHashFromStream(blob.byteLength, backupBlobStream)
+ this.cache.set(blob.hash, computedHash)
+ }
+
+ const matches = computedHash === blob.hash
+ return {
+ matches,
+ computedHash,
+ fromCache,
+ }
+ }
+}
+
+async function compareBackups(projectId, options) {
+ console.log(`Comparing backups for project ${projectId}`)
+ const { historyId } = await getBackupStatus(projectId)
+ const chunks = await getProjectChunks(historyId)
+ const blobStore = new BlobStore(historyId)
+ const backupPersistorForProject = await backupPersistor.forProject(
+ chunksBucket,
+ makeProjectKey(historyId, '')
+ )
+
+ let totalChunkMatches = 0
+ let totalChunkMismatches = 0
+ let totalChunksNotFound = 0
+ let totalBlobMatches = 0
+ let totalBlobMismatches = 0
+ let totalBlobsNotFound = 0
+ const errors = []
+ const blobComparator = new BlobComparator(backupPersistorForProject)
+
+ for (const chunk of chunks) {
+ try {
+ // Compare chunk content
+ const originalChunk = await historyStore.loadRaw(historyId, chunk.id)
+ const key = makeChunkKey(historyId, chunk.startVersion)
+ try {
+ const backupChunkStream =
+ await backupPersistorForProject.getObjectStream(chunksBucket, key)
+ const backupStr = await text(backupChunkStream.pipe(createGunzip()))
+ const originalStr = JSON.stringify(originalChunk)
+ const backupChunk = JSON.parse(backupStr)
+ const backupStartVersion = chunk.startVersion
+ const backupEndVersion = chunk.startVersion + backupChunk.changes.length
+
+ if (originalStr === backupStr) {
+ console.log(
+ `✓ Chunk ${chunk.id} (v${chunk.startVersion}-v${chunk.endVersion}) matches`
+ )
+ totalChunkMatches++
+ } else if (originalStr === JSON.stringify(JSON.parse(backupStr))) {
+ console.log(
+ `✓ Chunk ${chunk.id} (v${chunk.startVersion}-v${chunk.endVersion}) matches (after normalisation)`
+ )
+ totalChunkMatches++
+ } else if (backupEndVersion < chunk.endVersion) {
+ console.log(
+ `✗ Chunk ${chunk.id} is ahead of backup (v${chunk.startVersion}-v${chunk.endVersion} vs v${backupStartVersion}-v${backupEndVersion})`
+ )
+ totalChunkMismatches++
+ errors.push({ chunkId: chunk.id, error: 'Chunk ahead of backup' })
+ } else {
+ console.log(
+ `✗ Chunk ${chunk.id} (v${chunk.startVersion}-v${chunk.endVersion}) MISMATCH`
+ )
+ totalChunkMismatches++
+ errors.push({ chunkId: chunk.id, error: 'Chunk mismatch' })
+ }
+ } catch (err) {
+ if (err instanceof NotFoundError) {
+ console.log(`✗ Chunk ${chunk.id} not found in backup`, err.cause)
+ totalChunksNotFound++
+ errors.push({ chunkId: chunk.id, error: `Chunk not found` })
+ } else {
+ throw err
+ }
+ }
+
+ const history = History.fromRaw(originalChunk)
+
+ // Compare blobs in chunk
+ const blobHashes = new Set()
+ history.findBlobHashes(blobHashes)
+ const blobs = await blobStore.getBlobs(Array.from(blobHashes))
+ for (const blob of blobs) {
+ if (GLOBAL_BLOBS.has(blob.hash)) {
+ const globalBlob = GLOBAL_BLOBS.get(blob.hash)
+ console.log(
+ ` ✓ Blob ${blob.hash} is a global blob`,
+ globalBlob?.demoted ? '(demoted)' : ''
+ )
+ continue
+ }
+ try {
+ const { matches, computedHash, fromCache } =
+ await blobComparator.compareBlob(historyId, blob)
+
+ if (matches) {
+ console.log(
+ ` ✓ Blob ${blob.hash} hash matches (${blob.byteLength} bytes)` +
+ (fromCache ? ' (from cache)' : '')
+ )
+ totalBlobMatches++
+ } else {
+ console.log(
+ ` ✗ Blob ${blob.hash} hash mismatch (original: ${blob.hash}, backup: ${computedHash}) (${blob.byteLength} bytes, ${blob.stringLength} string length)` +
+ (fromCache ? ' (from cache)' : '')
+ )
+ totalBlobMismatches++
+ errors.push({
+ chunkId: chunk.id,
+ error: `Blob ${blob.hash} hash mismatch`,
+ })
+ }
+ } catch (err) {
+ if (err instanceof NotFoundError) {
+ console.log(` ✗ Blob ${blob.hash} not found in backup`, err.cause)
+ totalBlobsNotFound++
+ errors.push({
+ chunkId: chunk.id,
+ error: `Blob ${blob.hash} not found`,
+ })
+ } else {
+ throw err
+ }
+ }
+ }
+ } catch (err) {
+ console.error(`Error comparing chunk ${chunk.id}:`, err)
+ errors.push({ chunkId: chunk.id, error: err })
+ }
+ }
+
+ // Print summary
+ console.log('\nComparison Summary:')
+ console.log('==================')
+ console.log(`Total chunks: ${chunks.length}`)
+ console.log(`Chunk matches: ${totalChunkMatches}`)
+ console.log(`Chunk mismatches: ${totalChunkMismatches}`)
+ console.log(`Chunk not found: ${totalChunksNotFound}`)
+ console.log(`Blob matches: ${totalBlobMatches}`)
+ console.log(`Blob mismatches: ${totalBlobMismatches}`)
+ console.log(`Blob not found: ${totalBlobsNotFound}`)
+ console.log(`Errors: ${errors.length}`)
+
+ if (errors.length > 0) {
+ console.log('\nErrors:')
+ errors.forEach(({ chunkId, error }) => {
+ console.log(` Chunk ${chunkId}: ${error}`)
+ })
+ throw new Error('Backup comparison FAILED')
+ } else {
+ console.log('Backup comparison successful')
+ }
+}
+
+async function compareAllProjects(options) {
+ const limiter = pLimit(BATCH_CONCURRENCY)
+ let totalErrors = 0
+ let totalProjects = 0
+
+ async function processBatch(batch) {
+ if (gracefulShutdownInitiated) {
+ throw new Error('graceful shutdown')
+ }
+ const batchOperations = batch.map(project =>
+ limiter(async () => {
+ const projectId = project._id.toHexString()
+ totalProjects++
+ try {
+ console.log(`\nComparing project ${projectId} (${totalProjects})`)
+ await compareBackups(projectId, options)
+ } catch (err) {
+ totalErrors++
+ console.error(`Failed to compare project ${projectId}:`, err)
+ }
+ })
+ )
+ await Promise.allSettled(batchOperations)
+ }
+
+ const query = {
+ 'overleaf.history.id': { $exists: true },
+ 'overleaf.backup.lastBackedUpVersion': { $exists: true },
+ }
+
+ await batchedUpdate(
+ client.db().collection('projects'),
+ query,
+ processBatch,
+ {
+ _id: 1,
+ 'overleaf.history': 1,
+ 'overleaf.backup': 1,
+ },
+ { readPreference: 'secondary' },
+ {
+ BATCH_RANGE_START: convertToISODate(options['start-date']),
+ BATCH_RANGE_END: convertToISODate(options['end-date']),
+ }
+ )
+
+ console.log('\nComparison Summary:')
+ console.log('==================')
+ console.log(`Total projects processed: ${totalProjects}`)
+ console.log(`Projects with errors: ${totalErrors}`)
+
+ if (totalErrors > 0) {
+ throw new Error('Some project comparisons failed')
+ }
+}
+
+async function main() {
+ const options = handleOptions()
+ await ensureGlobalBlobsLoaded()
+ const projectId = options.projectId
+ if (options.status) {
+ await displayBackupStatus(projectId)
+ } else if (options.list) {
+ await displayPendingBackups(options)
+ } else if (options.fix !== undefined) {
+ await fixProjectsWithoutChunks(options)
+ } else if (options.pending) {
+ await backupPendingProjects(options)
+ } else if (options.init) {
+ await initializeProjects(options)
+ } else if (options.compare) {
+ if (options['start-date'] && options['end-date']) {
+ await compareAllProjects(options)
+ } else {
+ await compareBackups(projectId, options)
+ }
+ } else {
+ await backupProject(projectId, options)
+ }
+}
+
+// Only run command-line interface when script is run directly
+if (import.meta.url === `file://${process.argv[1]}`) {
+ main()
+ .then(() => {
+ console.log(
+ gracefulShutdownInitiated ? 'Exited - graceful shutdown' : 'Completed'
+ )
+ })
+ .catch(err => {
+ console.error('Error backing up project:', err)
+ process.exit(1)
+ })
+ .finally(() => {
+ knex
+ .destroy()
+ .then(() => {
+ console.log('Postgres connection closed')
+ })
+ .catch(err => {
+ console.error('Error closing Postgres connection:', err)
+ })
+ client
+ .close()
+ .then(() => {
+ console.log('MongoDB connection closed')
+ })
+ .catch(err => {
+ console.error('Error closing MongoDB connection:', err)
+ })
+ redis
+ .disconnect()
+ .then(() => {
+ console.log('Redis connection closed')
+ })
+ .catch(err => {
+ console.error('Error closing Redis connection:', err)
+ })
+ })
+}
diff --git a/services/history-v1/storage/scripts/backup_blob.mjs b/services/history-v1/storage/scripts/backup_blob.mjs
new file mode 100644
index 0000000000..314b05313e
--- /dev/null
+++ b/services/history-v1/storage/scripts/backup_blob.mjs
@@ -0,0 +1,173 @@
+// @ts-check
+import commandLineArgs from 'command-line-args'
+import { backupBlob, downloadBlobToDir } from '../lib/backupBlob.mjs'
+import withTmpDir from '../../api/controllers/with_tmp_dir.js'
+import {
+ BlobStore,
+ GLOBAL_BLOBS,
+ loadGlobalBlobs,
+} from '../lib/blob_store/index.js'
+import assert from '../lib/assert.js'
+import knex from '../lib/knex.js'
+import { client } from '../lib/mongodb.js'
+import redis from '../lib/redis.js'
+import { setTimeout } from 'node:timers/promises'
+import fs from 'node:fs'
+
+await loadGlobalBlobs()
+
+/**
+ * Gracefully shutdown the process
+ * @return {Promise}
+ */
+async function gracefulShutdown() {
+ console.log('Gracefully shutting down')
+ await knex.destroy()
+ await client.close()
+ await redis.disconnect()
+ await setTimeout(100)
+ process.exit()
+}
+
+/**
+ *
+ * @param {string} row
+ * @return {BackupBlobJob}
+ */
+function parseCSVRow(row) {
+ const [historyId, hash] = row.split(',')
+ validateBackedUpBlobJob({ historyId, hash })
+ return { historyId, hash }
+}
+
+/**
+ *
+ * @param {BackupBlobJob} job
+ */
+function validateBackedUpBlobJob(job) {
+ assert.projectId(job.historyId)
+ assert.blobHash(job.hash)
+}
+
+/**
+ *
+ * @param {string} path
+ * @return {Promise>}
+ */
+async function readCSV(path) {
+ let fh
+ /** @type {Array} */
+ const rows = []
+ try {
+ fh = await fs.promises.open(path, 'r')
+ } catch (error) {
+ console.error(`Could not open file: ${error}`)
+ throw error
+ }
+ for await (const line of fh.readLines()) {
+ try {
+ const row = parseCSVRow(line)
+ if (GLOBAL_BLOBS.has(row.hash)) {
+ console.log(`Skipping global blob: ${line}`)
+ continue
+ }
+ rows.push(row)
+ } catch (error) {
+ console.error(error instanceof Error ? error.message : error)
+ console.log(`Skipping invalid row: ${line}`)
+ }
+ }
+ return rows
+}
+
+/**
+ * @typedef {Object} BackupBlobJob
+ * @property {string} hash
+ * @property {string} historyId
+ */
+
+/**
+ * @param {Object} options
+ * @property {string} [options.historyId]
+ * @property {string} [options.hash]
+ * @property {string} [options.input]
+ * @return {Promise>}
+ */
+async function initialiseJobs({ historyId, hash, input }) {
+ if (input) {
+ return await readCSV(input)
+ }
+
+ if (!historyId) {
+ console.error('historyId is required')
+ process.exitCode = 1
+ await gracefulShutdown()
+ }
+
+ if (!hash) {
+ console.error('hash is required')
+ process.exitCode = 1
+ await gracefulShutdown()
+ }
+
+ validateBackedUpBlobJob({ historyId, hash })
+
+ if (GLOBAL_BLOBS.has(hash)) {
+ console.error(`Blob ${hash} is a global blob; not backing up`)
+ process.exitCode = 1
+ await gracefulShutdown()
+ }
+ return [{ hash, historyId }]
+}
+
+/**
+ *
+ * @param {string} historyId
+ * @param {string} hash
+ * @return {Promise}
+ */
+export async function downloadAndBackupBlob(historyId, hash) {
+ const blobStore = new BlobStore(historyId)
+ const blob = await blobStore.getBlob(hash)
+ if (!blob) {
+ throw new Error(`Blob ${hash} could not be loaded`)
+ }
+ await withTmpDir(`blob-${hash}`, async tmpDir => {
+ const filePath = await downloadBlobToDir(historyId, blob, tmpDir)
+ console.log(`Downloaded blob ${hash} to ${filePath}`)
+ await backupBlob(historyId, blob, filePath)
+ console.log('Backed up blob')
+ })
+}
+
+let jobs
+
+const options = commandLineArgs([
+ { name: 'historyId', type: String },
+ { name: 'hash', type: String },
+ { name: 'input', type: String },
+])
+
+try {
+ jobs = await initialiseJobs(options)
+} catch (error) {
+ console.error(error)
+ await gracefulShutdown()
+}
+
+if (!Array.isArray(jobs)) {
+ // This is mostly to satisfy typescript
+ process.exitCode = 1
+ await gracefulShutdown()
+ process.exit(1)
+}
+
+for (const { historyId, hash } of jobs) {
+ try {
+ await downloadAndBackupBlob(historyId, hash)
+ } catch (error) {
+ console.error(error)
+ process.exitCode = 1
+ }
+}
+await gracefulShutdown()
diff --git a/services/history-v1/storage/scripts/backup_sample.mjs b/services/history-v1/storage/scripts/backup_sample.mjs
new file mode 100644
index 0000000000..35ee1e93f8
--- /dev/null
+++ b/services/history-v1/storage/scripts/backup_sample.mjs
@@ -0,0 +1,153 @@
+// @ts-check
+import { ObjectId } from 'mongodb'
+import { READ_PREFERENCE_SECONDARY } from '@overleaf/mongo-utils/batchedUpdate.js'
+import { db, client } from '../lib/mongodb.js'
+
+const projectsCollection = db.collection('projects')
+
+// Enable caching for ObjectId.toString()
+ObjectId.cacheHexString = true
+
+// Configuration
+const SAMPLE_SIZE_PER_ITERATION = process.argv[2]
+ ? parseInt(process.argv[2], 10)
+ : 10000
+const TARGET_ERROR_PERCENTAGE = process.argv[3]
+ ? parseFloat(process.argv[3])
+ : 5.0
+
+let gracefulShutdownInitiated = false
+
+process.on('SIGINT', handleSignal)
+process.on('SIGTERM', handleSignal)
+
+function handleSignal() {
+ gracefulShutdownInitiated = true
+ console.warn('graceful shutdown initiated')
+}
+
+async function takeSample(sampleSize) {
+ const results = await projectsCollection
+ .aggregate(
+ [
+ { $sample: { size: sampleSize } },
+ {
+ $match: { 'overleaf.backup.lastBackedUpVersion': { $exists: true } },
+ },
+ {
+ $count: 'total',
+ },
+ ],
+ { readPreference: READ_PREFERENCE_SECONDARY }
+ )
+ .toArray()
+
+ const count = results[0]?.total || 0
+ return { totalSampled: sampleSize, backedUp: count }
+}
+
+function calculateStatistics(
+ cumulativeSampled,
+ cumulativeBackedUp,
+ totalPopulation
+) {
+ const proportion = Math.max(1, cumulativeBackedUp) / cumulativeSampled
+
+ // Standard error with finite population correction
+ const fpc = Math.sqrt(
+ (totalPopulation - cumulativeSampled) / (totalPopulation - 1)
+ )
+ const stdError =
+ Math.sqrt((proportion * (1 - proportion)) / cumulativeSampled) * fpc
+
+ // 95% confidence interval is approximately ±1.96 standard errors
+ const marginOfError = 1.96 * stdError
+
+ return {
+ proportion,
+ percentage: (proportion * 100).toFixed(2),
+ marginOfError,
+ errorPercentage: (marginOfError * 100).toFixed(2),
+ lowerBound: ((proportion - marginOfError) * 100).toFixed(2),
+ upperBound: ((proportion + marginOfError) * 100).toFixed(2),
+ sampleSize: cumulativeSampled,
+ populationSize: totalPopulation,
+ }
+}
+
+async function main() {
+ console.log('Date:', new Date().toISOString())
+ const totalCount = await projectsCollection.estimatedDocumentCount({
+ readPreference: READ_PREFERENCE_SECONDARY,
+ })
+ console.log(
+ `Total projects in collection (estimated): ${totalCount.toLocaleString()}`
+ )
+ console.log(`Target margin of error: ${TARGET_ERROR_PERCENTAGE}%`)
+
+ let cumulativeSampled = 0
+ let cumulativeBackedUp = 0
+ let currentError = Infinity
+ let iteration = 0
+
+ console.log('Iteration | Total Sampled | % Backed Up | Margin of Error')
+ console.log('----------|---------------|-------------|----------------')
+
+ while (currentError > TARGET_ERROR_PERCENTAGE) {
+ if (gracefulShutdownInitiated) {
+ console.log('Graceful shutdown initiated. Exiting sampling loop.')
+ break
+ }
+
+ iteration++
+ const { totalSampled, backedUp } = await takeSample(
+ SAMPLE_SIZE_PER_ITERATION
+ )
+ cumulativeSampled += totalSampled
+ cumulativeBackedUp += backedUp
+
+ const stats = calculateStatistics(
+ cumulativeSampled,
+ cumulativeBackedUp,
+ totalCount
+ )
+ currentError = parseFloat(stats.errorPercentage)
+
+ console.log(
+ `${iteration.toString().padStart(9)} | ` +
+ `${cumulativeSampled.toString().padStart(13)} | ` +
+ `${stats.percentage.padStart(10)}% | ` +
+ `\u00B1${stats.errorPercentage}%`
+ )
+
+ // Small delay between iterations
+ await new Promise(resolve => setTimeout(resolve, 100))
+ }
+
+ const finalStats = calculateStatistics(
+ cumulativeSampled,
+ cumulativeBackedUp,
+ totalCount
+ )
+
+ console.log(
+ `Projects sampled: ${cumulativeSampled.toLocaleString()} out of ${totalCount.toLocaleString()}`
+ )
+ console.log(
+ `Estimated percentage with lastBackedUpVersion: ${finalStats.percentage}%`
+ )
+ console.log(
+ `95% Confidence Interval: ${finalStats.lowerBound}% - ${finalStats.upperBound}%`
+ )
+ console.log(`Final Margin of Error: \u00B1${finalStats.errorPercentage}%`)
+}
+
+main()
+ .then(() => console.log('Done.'))
+ .catch(err => {
+ console.error('Error:', err)
+ process.exitCode = 1
+ })
+ .finally(() => {
+ client.close().catch(err => console.error('Error closing MongoDB:', err))
+ })
diff --git a/services/history-v1/storage/scripts/backup_scheduler.mjs b/services/history-v1/storage/scripts/backup_scheduler.mjs
new file mode 100644
index 0000000000..38b6e6ef04
--- /dev/null
+++ b/services/history-v1/storage/scripts/backup_scheduler.mjs
@@ -0,0 +1,429 @@
+import Queue from 'bull'
+import config from 'config'
+import commandLineArgs from 'command-line-args'
+import logger from '@overleaf/logger'
+import {
+ listPendingBackups,
+ listUninitializedBackups,
+ getBackupStatus,
+} from '../lib/backup_store/index.js'
+
+logger.initialize('backup-queue')
+
+// Use the same redis config as backup_worker
+const redisOptions = config.get('redis.queue')
+
+// Create a Bull queue named 'backup'
+const backupQueue = new Queue('backup', {
+ redis: redisOptions,
+ defaultJobOptions: {
+ removeOnComplete: { age: 60 }, // keep completed jobs for 60 seconds
+ removeOnFail: { age: 7 * 24 * 3600, count: 1000 }, // keep failed jobs for 7 days, max 1000
+ },
+})
+
+// Define command-line options
+const optionDefinitions = [
+ { name: 'clean', type: Boolean },
+ { name: 'status', type: Boolean },
+ {
+ name: 'add',
+ type: String,
+ multiple: true,
+ description: 'Project IDs or date range in YYYY-MM-DD:YYYY-MM-DD format',
+ },
+ { name: 'monitor', type: Boolean },
+ {
+ name: 'queue-pending',
+ type: Number,
+ description:
+ 'Find projects with pending changes older than N seconds and add them to the queue',
+ },
+ {
+ name: 'show-pending',
+ type: Number,
+ description:
+ 'Show count of pending projects older than N seconds without adding to queue',
+ },
+ {
+ name: 'limit',
+ type: Number,
+ description: 'Limit the number of jobs to be added',
+ },
+ {
+ name: 'interval',
+ type: Number,
+ description: 'Time in seconds to spread jobs over (default: 300)',
+ defaultValue: 300,
+ },
+ {
+ name: 'backoff-delay',
+ type: Number,
+ description:
+ 'Backoff delay in milliseconds for failed jobs (default: 1000)',
+ defaultValue: 1000,
+ },
+ {
+ name: 'attempts',
+ type: Number,
+ description: 'Number of retry attempts for failed jobs (default: 3)',
+ defaultValue: 3,
+ },
+ {
+ name: 'warn-threshold',
+ type: Number,
+ description: 'Warn about any project exceeding this pending age',
+ defaultValue: 2 * 3600, // 2 hours
+ },
+ {
+ name: 'verbose',
+ alias: 'v',
+ type: Boolean,
+ description: 'Show detailed information when used with --show-pending',
+ },
+]
+
+// Parse command line arguments
+const options = commandLineArgs(optionDefinitions)
+const WARN_THRESHOLD = options['warn-threshold']
+
+// Helper to validate date format
+function isValidDateFormat(dateStr) {
+ return /^\d{4}-\d{2}-\d{2}$/.test(dateStr)
+}
+
+// Helper to validate the pending time parameter
+function validatePendingTime(option, value) {
+ if (typeof value !== 'number' || value <= 0) {
+ console.error(
+ `Error: --${option} requires a positive numeric TIME argument in seconds`
+ )
+ console.error(`Example: --${option} 3600`)
+ process.exit(1)
+ }
+ return value
+}
+
+// Helper to format the pending time display
+function formatPendingTime(timestamp) {
+ const now = new Date()
+ const diffMs = now - timestamp
+ const seconds = Math.floor(diffMs / 1000)
+ return `${timestamp.toISOString()} (${seconds} seconds ago)`
+}
+
+// Helper to add a job to the queue, checking for duplicates
+async function addJobWithCheck(queue, data, options) {
+ const jobId = options.jobId
+
+ // Check if the job already exists
+ const existingJob = await queue.getJob(jobId)
+
+ if (existingJob) {
+ return { job: existingJob, added: false }
+ } else {
+ const job = await queue.add(data, options)
+ return { job, added: true }
+ }
+}
+
+// Setup queue event listeners
+function setupMonitoring() {
+ console.log('Starting queue monitoring. Press Ctrl+C to exit.')
+
+ backupQueue.on('global:error', error => {
+ logger.info({ error }, 'Queue error')
+ })
+
+ backupQueue.on('global:waiting', jobId => {
+ logger.info({ jobId }, 'job is waiting')
+ })
+
+ backupQueue.on('global:active', jobId => {
+ logger.info({ jobId }, 'job is now active')
+ })
+
+ backupQueue.on('global:stalled', jobId => {
+ logger.info({ jobId }, 'job has stalled')
+ })
+
+ backupQueue.on('global:progress', (jobId, progress) => {
+ logger.info({ jobId, progress }, 'job progress')
+ })
+
+ backupQueue.on('global:completed', (jobId, result) => {
+ logger.info({ jobId, result }, 'job completed')
+ })
+
+ backupQueue.on('global:failed', (jobId, err) => {
+ logger.info({ jobId, err }, 'job failed')
+ })
+
+ backupQueue.on('global:paused', () => {
+ logger.info({}, 'Queue paused')
+ })
+
+ backupQueue.on('global:resumed', () => {
+ logger.info({}, 'Queue resumed')
+ })
+
+ backupQueue.on('global:cleaned', (jobs, type) => {
+ logger.info({ jobsCount: jobs.length, type }, 'Jobs cleaned')
+ })
+
+ backupQueue.on('global:drained', () => {
+ logger.info({}, 'Queue drained')
+ })
+
+ backupQueue.on('global:removed', jobId => {
+ logger.info({ jobId }, 'Job removed')
+ })
+}
+
+async function addDateRangeJob(input) {
+ const [startDate, endDate] = input.split(':')
+ if (!isValidDateFormat(startDate) || !isValidDateFormat(endDate)) {
+ console.error(
+ `Invalid date format for "${input}". Use YYYY-MM-DD:YYYY-MM-DD`
+ )
+ return
+ }
+
+ const jobId = `backup-${startDate}-to-${endDate}`
+ const { job, added } = await addJobWithCheck(
+ backupQueue,
+ { startDate, endDate },
+ { jobId }
+ )
+
+ console.log(
+ `${added ? 'Added' : 'Already exists'}: date range backup job: ${startDate} to ${endDate}, job ID: ${job.id}`
+ )
+}
+
+// Helper to list pending and uninitialized backups
+// This function combines the two cursors into a single generator
+// to yield projects from both lists
+async function* pendingCursor(timeIntervalMs, limit) {
+ for await (const project of listPendingBackups(timeIntervalMs, limit)) {
+ yield project
+ }
+ for await (const project of listUninitializedBackups(timeIntervalMs, limit)) {
+ yield project
+ }
+}
+
+// Process pending projects with changes older than the specified seconds
+async function processPendingProjects(
+ age,
+ showOnly,
+ limit,
+ verbose,
+ jobInterval,
+ jobOpts = {}
+) {
+ const timeIntervalMs = age * 1000
+ console.log(
+ `Finding projects with pending changes older than ${age} seconds${showOnly ? ' (count only)' : ''}`
+ )
+
+ let count = 0
+ let addedCount = 0
+ let existingCount = 0
+ // Pass the limit directly to MongoDB query for better performance
+ const changeTimes = []
+ for await (const project of pendingCursor(timeIntervalMs, limit)) {
+ const projectId = project._id.toHexString()
+ const pendingAt =
+ project.overleaf?.backup?.pendingChangeAt || project._id.getTimestamp()
+ if (pendingAt) {
+ changeTimes.push(pendingAt)
+ const pendingAge = Math.floor((Date.now() - pendingAt.getTime()) / 1000)
+ if (pendingAge > WARN_THRESHOLD) {
+ try {
+ const backupStatus = await getBackupStatus(projectId)
+ logger.warn(
+ {
+ projectId,
+ pendingAt,
+ pendingAge,
+ backupStatus,
+ warnThreshold: WARN_THRESHOLD,
+ },
+ `pending change exceeds rpo warning threshold`
+ )
+ } catch (err) {
+ logger.error(
+ { projectId, pendingAt, pendingAge },
+ 'Error getting backup status'
+ )
+ throw err
+ }
+ }
+ }
+ if (showOnly && verbose) {
+ console.log(
+ `Project: ${projectId} (pending since: ${formatPendingTime(pendingAt)})`
+ )
+ } else if (!showOnly) {
+ const delay = Math.floor(Math.random() * jobInterval * 1000) // add random delay to avoid all jobs running simultaneously
+ const { job, added } = await addJobWithCheck(
+ backupQueue,
+ { projectId, pendingChangeAt: pendingAt.getTime() },
+ { ...jobOpts, delay, jobId: projectId }
+ )
+
+ if (added) {
+ if (verbose) {
+ console.log(
+ `Added job for project: ${projectId}, job ID: ${job.id} (pending since: ${formatPendingTime(pendingAt)})`
+ )
+ }
+ addedCount++
+ } else {
+ if (verbose) {
+ console.log(
+ `Job already exists for project: ${projectId}, job ID: ${job.id} (pending since: ${formatPendingTime(pendingAt)})`
+ )
+ }
+ existingCount++
+ }
+ }
+
+ count++
+ if (count % 1000 === 0) {
+ console.log(
+ `Processed ${count} projects`,
+ showOnly ? '' : `(${addedCount} added, ${existingCount} existing)`
+ )
+ }
+ }
+ // Set oldestChange to undefined if there are no changes
+ const oldestChange =
+ changeTimes.length > 0
+ ? changeTimes.reduce((min, time) => (time < min ? time : min))
+ : undefined
+
+ if (showOnly) {
+ console.log(
+ `Found ${count} projects with pending changes (not added to queue)`
+ )
+ } else {
+ console.log(`Found ${count} projects with pending changes:`)
+ console.log(` ${addedCount} jobs added to queue`)
+ console.log(` ${existingCount} jobs already existed in queue`)
+ if (oldestChange) {
+ console.log(` Oldest pending change: ${formatPendingTime(oldestChange)}`)
+ }
+ }
+}
+
+// Main execution block
+async function run() {
+ const optionCount = [
+ options.clean,
+ options.status,
+ options.add,
+ options.monitor,
+ options['queue-pending'] !== undefined,
+ options['show-pending'] !== undefined,
+ ].filter(Boolean).length
+ if (optionCount > 1) {
+ console.error('Only one option can be specified')
+ process.exit(1)
+ }
+
+ if (options.clean) {
+ const beforeCounts = await backupQueue.getJobCounts()
+ console.log('Current queue state:', JSON.stringify(beforeCounts))
+ console.log('Cleaning completed and failed jobs...')
+ await backupQueue.clean(1, 'completed')
+ await backupQueue.clean(1, 'failed')
+ const afterCounts = await backupQueue.getJobCounts()
+ console.log('Current queue state:', JSON.stringify(afterCounts))
+ console.log('Queue cleaned successfully')
+ } else if (options.status) {
+ const counts = await backupQueue.getJobCounts()
+ console.log('Current queue state:', JSON.stringify(counts))
+ } else if (options.add) {
+ const inputs = Array.isArray(options.add) ? options.add : [options.add]
+ for (const input of inputs) {
+ if (input.includes(':')) {
+ // Handle date range format
+ await addDateRangeJob(input)
+ } else {
+ // Handle project ID format
+ const { job, added } = await addJobWithCheck(
+ backupQueue,
+ { projectId: input },
+ { jobId: input }
+ )
+ console.log(
+ `${added ? 'Added' : 'Already exists'}: job for project: ${input}, job ID: ${job.id}`
+ )
+ }
+ }
+ } else if (options.monitor) {
+ setupMonitoring()
+ } else if (options['queue-pending'] !== undefined) {
+ const age = validatePendingTime('queue-pending', options['queue-pending'])
+ await processPendingProjects(
+ age,
+ false,
+ options.limit,
+ options.verbose,
+ options.interval,
+ {
+ attempts: options.attempts,
+ backoff: {
+ type: 'exponential',
+ delay: options['backoff-delay'],
+ },
+ }
+ )
+ } else if (options['show-pending'] !== undefined) {
+ const age = validatePendingTime('show-pending', options['show-pending'])
+ await processPendingProjects(age, true, options.limit, options.verbose)
+ } else {
+ console.log('Usage:')
+ console.log(' --clean Clean up completed and failed jobs')
+ console.log(' --status Show current job counts')
+ console.log(' --add [projectId] Add a job for the specified projectId')
+ console.log(
+ ' --add [YYYY-MM-DD:YYYY-MM-DD] Add a job for the specified date range'
+ )
+ console.log(' --monitor Monitor queue events')
+ console.log(
+ ' --queue-pending TIME Find projects with changes older than TIME seconds and add them to the queue'
+ )
+ console.log(
+ ' --show-pending TIME Show count of pending projects older than TIME seconds'
+ )
+ console.log(' --limit N Limit the number of jobs to be added')
+ console.log(
+ ' --interval TIME Time interval in seconds to spread jobs over'
+ )
+ console.log(
+ ' --backoff-delay TIME Backoff delay in milliseconds for failed jobs (default: 1000)'
+ )
+ console.log(
+ ' --attempts N Number of retry attempts for failed jobs (default: 3)'
+ )
+ console.log(
+ ' --verbose, -v Show detailed information when used with --show-pending'
+ )
+ }
+}
+
+// Run and handle errors
+run()
+ .catch(err => {
+ console.error('Error:', err)
+ process.exit(1)
+ })
+ .then(result => {
+ // Only exit if not in monitor mode
+ if (!options.monitor) {
+ process.exit(0)
+ }
+ })
diff --git a/services/history-v1/storage/scripts/backup_worker.mjs b/services/history-v1/storage/scripts/backup_worker.mjs
new file mode 100644
index 0000000000..1097bb04b9
--- /dev/null
+++ b/services/history-v1/storage/scripts/backup_worker.mjs
@@ -0,0 +1,144 @@
+import Queue from 'bull'
+import logger from '@overleaf/logger'
+import config from 'config'
+import metrics from '@overleaf/metrics'
+import {
+ backupProject,
+ initializeProjects,
+ configureBackup,
+} from './backup.mjs'
+
+const CONCURRENCY = 15
+const WARN_THRESHOLD = 2 * 60 * 60 * 1000 // warn if projects are older than this
+const redisOptions = config.get('redis.queue')
+const JOB_TIME_BUCKETS = [10, 100, 500, 1000, 5000, 10000, 30000, 60000] // milliseconds
+const LAG_TIME_BUCKETS_HRS = [
+ 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.75, 2, 3, 4, 5, 6,
+] // hours
+
+// Configure backup settings to match worker concurrency
+configureBackup({ concurrency: 50, useSecondary: true })
+
+// Create a Bull queue named 'backup'
+const backupQueue = new Queue('backup', {
+ redis: redisOptions,
+ settings: {
+ lockDuration: 15 * 60 * 1000, // 15 minutes
+ lockRenewTime: 60 * 1000, // 1 minute
+ maxStalledCount: 0, // mark stalled jobs as failed
+ },
+})
+
+// Log queue events
+backupQueue.on('active', job => {
+ logger.debug({ job }, 'job is now active')
+})
+
+backupQueue.on('completed', (job, result) => {
+ metrics.inc('backup_worker_job', 1, { status: 'completed' })
+ logger.debug({ job, result }, 'job completed')
+})
+
+backupQueue.on('failed', (job, err) => {
+ metrics.inc('backup_worker_job', 1, { status: 'failed' })
+ logger.error({ job, err }, 'job failed')
+})
+
+backupQueue.on('waiting', jobId => {
+ logger.debug({ jobId }, 'job is waiting')
+})
+
+backupQueue.on('error', error => {
+ logger.error({ error }, 'queue error')
+})
+
+backupQueue.on('stalled', job => {
+ logger.error({ job }, 'job has stalled')
+})
+
+backupQueue.on('lock-extension-failed', (job, err) => {
+ logger.error({ job, err }, 'lock extension failed')
+})
+
+backupQueue.on('paused', () => {
+ logger.info('queue paused')
+})
+
+backupQueue.on('resumed', () => {
+ logger.info('queue resumed')
+})
+
+// Process jobs
+backupQueue.process(CONCURRENCY, async job => {
+ const { projectId, startDate, endDate } = job.data
+
+ if (projectId) {
+ return await runBackup(projectId, job.data, job)
+ } else if (startDate && endDate) {
+ return await runInit(startDate, endDate)
+ } else {
+ throw new Error('invalid job data')
+ }
+})
+
+async function runBackup(projectId, data, job) {
+ const { pendingChangeAt } = data
+ // record the time it takes to run the backup job
+ const timer = new metrics.Timer(
+ 'backup_worker_job_duration',
+ 1,
+ {},
+ JOB_TIME_BUCKETS
+ )
+ const pendingAge = Date.now() - pendingChangeAt
+ if (pendingAge > WARN_THRESHOLD) {
+ logger.warn(
+ { projectId, pendingAge, job },
+ 'project has been pending for a long time'
+ )
+ }
+ try {
+ logger.debug({ projectId }, 'processing backup for project')
+ await backupProject(projectId, {})
+ metrics.inc('backup_worker_project', 1, {
+ status: 'success',
+ })
+ timer.done()
+ // record the replication lag (time from change to backup)
+ if (pendingChangeAt) {
+ metrics.histogram(
+ 'backup_worker_replication_lag_in_hours',
+ (Date.now() - pendingChangeAt) / (3600 * 1000),
+ LAG_TIME_BUCKETS_HRS
+ )
+ }
+ return `backup completed ${projectId}`
+ } catch (err) {
+ metrics.inc('backup_worker_project', 1, { status: 'failed' })
+ logger.error({ projectId, err }, 'backup failed')
+ throw err // Re-throw to mark job as failed
+ }
+}
+
+async function runInit(startDate, endDate) {
+ try {
+ logger.info({ startDate, endDate }, 'initializing projects')
+ await initializeProjects({ 'start-date': startDate, 'end-date': endDate })
+ return `initialization completed ${startDate} - ${endDate}`
+ } catch (err) {
+ logger.error({ startDate, endDate, err }, 'initialization failed')
+ throw err
+ }
+}
+
+export async function drainQueue() {
+ logger.info({ queue: backupQueue.name }, 'pausing queue')
+ await backupQueue.pause(true) // pause this worker and wait for jobs to finish
+ logger.info({ queue: backupQueue.name }, 'closing queue')
+ await backupQueue.close()
+}
+
+export async function healthCheck() {
+ const count = await backupQueue.count()
+ metrics.gauge('backup_worker_queue_length', count)
+}
diff --git a/services/history-v1/storage/scripts/expire_redis_chunks.js b/services/history-v1/storage/scripts/expire_redis_chunks.js
new file mode 100644
index 0000000000..cb6d689e2c
--- /dev/null
+++ b/services/history-v1/storage/scripts/expire_redis_chunks.js
@@ -0,0 +1,74 @@
+const logger = require('@overleaf/logger')
+const commandLineArgs = require('command-line-args')
+const redis = require('../lib/redis')
+const { scanAndProcessDueItems } = require('../lib/scan')
+const { expireProject, claimExpireJob } = require('../lib/chunk_store/redis')
+const config = require('config')
+const { fetchNothing } = require('@overleaf/fetch-utils')
+
+const rclient = redis.rclientHistory
+
+const optionDefinitions = [
+ { name: 'dry-run', alias: 'd', type: Boolean },
+ { name: 'post-request', type: Boolean },
+]
+const options = commandLineArgs(optionDefinitions)
+const DRY_RUN = options['dry-run'] || false
+const POST_REQUEST = options['post-request'] || false
+const HISTORY_V1_URL = `http://${process.env.HISTORY_V1_HOST || 'localhost'}:${process.env.PORT || 3100}`
+
+logger.initialize('expire-redis-chunks')
+
+async function expireProjectAction(projectId) {
+ const job = await claimExpireJob(projectId)
+ if (POST_REQUEST) {
+ await requestProjectExpiry(projectId)
+ } else {
+ await expireProject(projectId)
+ }
+ if (job && job.close) {
+ await job.close()
+ }
+}
+
+async function requestProjectExpiry(projectId) {
+ logger.debug({ projectId }, 'sending project expire request')
+ const url = `${HISTORY_V1_URL}/api/projects/${projectId}/expire`
+ const credentials = Buffer.from(
+ `staging:${config.get('basicHttpAuth.password')}`
+ ).toString('base64')
+ await fetchNothing(url, {
+ method: 'POST',
+ headers: {
+ Authorization: `Basic ${credentials}`,
+ },
+ })
+}
+
+async function runExpireChunks() {
+ await scanAndProcessDueItems(
+ rclient,
+ 'expireChunks',
+ 'expire-time',
+ expireProjectAction,
+ DRY_RUN
+ )
+}
+
+if (require.main === module) {
+ runExpireChunks()
+ .catch(err => {
+ logger.fatal(
+ { err, taskName: 'expireChunks' },
+ 'Unhandled error in runExpireChunks'
+ )
+ process.exit(1)
+ })
+ .finally(async () => {
+ await redis.disconnect()
+ })
+} else {
+ module.exports = {
+ runExpireChunks,
+ }
+}
diff --git a/services/history-v1/storage/scripts/export_global_blobs.mjs b/services/history-v1/storage/scripts/export_global_blobs.mjs
new file mode 100644
index 0000000000..ccbb1237dd
--- /dev/null
+++ b/services/history-v1/storage/scripts/export_global_blobs.mjs
@@ -0,0 +1,69 @@
+/**
+ * A script to export the global blobs from mongo to a CSV file.
+ *
+ * node storage/scripts/export_global_blobs.mjs --output global_blobs.csv
+ *
+ * The output CSV has the following format:
+ *
+ * hash,path,byteLength,stringLength,demoted
+ *
+ * hash: the hash of the blob
+ * path: the path of the blob in the blob store
+ * byteLength: the byte length of the blob, or empty if unknown
+ * stringLength: the string length of the blob, or empty if unknown
+ * demoted: true if the blob has been demoted to a reference, false otherwise
+ */
+
+// @ts-check
+import { ObjectId } from 'mongodb'
+import { GLOBAL_BLOBS, loadGlobalBlobs } from '../lib/blob_store/index.js'
+import { client } from '../lib/mongodb.js'
+import commandLineArgs from 'command-line-args'
+import fs from 'node:fs'
+
+// Enable caching for ObjectId.toString()
+ObjectId.cacheHexString = true
+
+function parseArgs() {
+ const args = commandLineArgs([
+ {
+ name: 'output',
+ type: String,
+ alias: 'o',
+ },
+ ])
+ const OUTPUT_STREAM = fs.createWriteStream(args['output'], { flags: 'wx' })
+
+ return {
+ OUTPUT_STREAM,
+ }
+}
+
+const { OUTPUT_STREAM } = parseArgs()
+
+async function main() {
+ await loadGlobalBlobs()
+ OUTPUT_STREAM.write('hash,path,byteLength,stringLength,demoted\n')
+ for (const [hash, { blob, demoted }] of GLOBAL_BLOBS) {
+ const { hash: blobHash, byteLength, stringLength } = blob
+ if (blobHash !== hash) {
+ throw new Error(`hash mismatch: ${hash} !== ${blobHash}`)
+ }
+ const path = blobHash.slice(0, 2) + '/' + blobHash.slice(2)
+ const byteLengthStr = byteLength === null ? '' : byteLength
+ const stringLengthStr = stringLength === null ? '' : stringLength
+ OUTPUT_STREAM.write(
+ `${hash},${path},${byteLengthStr},${stringLengthStr},${demoted}\n`
+ )
+ }
+}
+
+main()
+ .then(() => console.log('Done.'))
+ .catch(err => {
+ console.error('Error:', err)
+ process.exitCode = 1
+ })
+ .finally(() => {
+ client.close().catch(err => console.error('Error closing MongoDB:', err))
+ })
diff --git a/services/history-v1/storage/scripts/fix_string_backedUpBlobs_ids.mjs b/services/history-v1/storage/scripts/fix_string_backedUpBlobs_ids.mjs
index adb5028643..007eebea77 100644
--- a/services/history-v1/storage/scripts/fix_string_backedUpBlobs_ids.mjs
+++ b/services/history-v1/storage/scripts/fix_string_backedUpBlobs_ids.mjs
@@ -20,7 +20,13 @@ async function processRecord(record) {
mongoId(record._id)
const newId = new ObjectId(record._id)
if (config.commit) {
- await backedUpBlobs.insertOne({ _id: newId, blobs: record.blobs })
+ await backedUpBlobs.updateOne(
+ { _id: newId },
+ {
+ $addToSet: { blobs: { $each: record.blobs } },
+ },
+ { upsert: true }
+ )
await backedUpBlobs.deleteOne({ _id: record._id })
}
STATS.replaced++
diff --git a/services/history-v1/storage/scripts/list_redis_buffer_stats.js b/services/history-v1/storage/scripts/list_redis_buffer_stats.js
new file mode 100644
index 0000000000..a53a939e44
--- /dev/null
+++ b/services/history-v1/storage/scripts/list_redis_buffer_stats.js
@@ -0,0 +1,145 @@
+const { rclientHistory, disconnect } = require('../lib/redis')
+const { scanRedisCluster } = require('../lib/scan')
+
+// Lua script to get snapshot length, change lengths, and change timestamps
+// Assumes snapshot key is a string and changes key is a list.
+const LUA_SCRIPT = `
+ -- local cjson = require('cjson')
+ local snapshotKey = KEYS[1]
+ local changesKey = KEYS[2]
+
+ -- Get snapshot length (returns 0 if key does not exist)
+ local snapshotLen = redis.call('STRLEN', snapshotKey)
+
+ -- Return nil if snapshot is empty
+ if snapshotLen == 0 then
+ return nil
+ end
+
+ local changeLengths = {}
+ local changeTimestamps = {}
+
+ -- Get all changes (returns empty list if key does not exist)
+ local changes = redis.call('LRANGE', changesKey, 0, -1)
+
+ -- FIXME: it would be better to send all the changes back and do the processing
+ -- in JS to avoid blocking redis, if we need to run this script regularly
+ for i, change in ipairs(changes) do
+ -- Calculate length
+ table.insert(changeLengths, string.len(change))
+
+ -- Attempt to decode JSON and extract timestamp
+ local ok, decoded = pcall(cjson.decode, change)
+ if ok and type(decoded) == 'table' and decoded.timestamp then
+ table.insert(changeTimestamps, decoded.timestamp)
+ else
+ -- Handle cases where decoding fails or timestamp is missing
+ -- Log or insert a placeholder like nil if needed, otherwise skip
+ table.insert(changeTimestamps, nil) -- Keep placeholder for consistency
+ end
+ end
+
+ -- Return snapshot length, list of change lengths, and list of change timestamps
+ return {snapshotLen, changeLengths, changeTimestamps}
+`
+
+// Define the command if it doesn't exist
+if (!rclientHistory.getProjectBufferStats) {
+ rclientHistory.defineCommand('getProjectBufferStats', {
+ numberOfKeys: 2,
+ lua: LUA_SCRIPT,
+ })
+}
+
+/**
+ * Processes a single project ID: fetches its buffer stats from Redis
+ * and writes the results to the output stream in CSV format.
+ *
+ * @param {string} projectId The project ID to process.
+ * @param {WritableStream} outputStream The stream to write CSV output to.
+ */
+async function processProject(projectId, outputStream) {
+ try {
+ // Get current time in milliseconds *before* fetching data
+ const nowMs = Date.now()
+
+ // Execute the Lua script
+ const result = await rclientHistory.getProjectBufferStats(
+ `snapshot:${projectId}`,
+ `changes:${projectId}`
+ )
+
+ // Check if the result is null (e.g., snapshot is empty)
+ if (result === null) {
+ console.log(
+ `Skipping project ${projectId}: Snapshot is empty or does not exist.`
+ )
+ return
+ }
+
+ const [snapshotSize, changeSizes, changeTimestamps] = result
+
+ // Output snapshot size
+ outputStream.write(`${projectId},snapshotSize,${snapshotSize}\n`)
+ outputStream.write(`${projectId},changeCount,${changeSizes.length}\n`)
+
+ const changes = changeSizes.map((size, index) => [
+ size,
+ changeTimestamps[index],
+ ])
+
+ let totalChangeSize = 0
+ // Output change sizes
+ for (const [changeSize, changeTimestamp] of changes) {
+ totalChangeSize += parseInt(changeSize, 10)
+ const age = nowMs - new Date(changeTimestamp)
+ const ageInSeconds = Math.floor(age / 1000)
+ outputStream.write(`${projectId},change,${changeSize},${ageInSeconds}\n`)
+ }
+ outputStream.write(`${projectId},totalChangeSize,${totalChangeSize}\n`)
+ } catch (err) {
+ // Log error for this specific project but continue with others
+ console.error(`Error processing project ${projectId}:`, err)
+ }
+}
+
+async function main() {
+ const outputStream = process.stdout
+
+ // Write CSV header
+ outputStream.write('projectId,type,size,age\n')
+
+ try {
+ const scanPattern = 'snapshot:*'
+ console.log(`Scanning Redis for keys matching "${scanPattern}"...`)
+
+ for await (const keysBatch of scanRedisCluster(
+ rclientHistory,
+ scanPattern
+ )) {
+ for (const key of keysBatch) {
+ const parts = key.split(':')
+ if (parts.length !== 2 || parts[0] !== 'snapshot') {
+ console.warn(`Skipping malformed key: ${key}`)
+ continue
+ }
+ const projectId = parts[1]
+
+ // Call processProject directly and await it sequentially
+ await processProject(projectId, outputStream)
+ }
+ }
+
+ console.log('Finished processing keys.')
+ } catch (error) {
+ console.error('Error during Redis scan:', error)
+ } finally {
+ await disconnect()
+ console.log('Redis connections closed.')
+ }
+}
+
+main().catch(err => {
+ console.error('Unhandled error in main:', err)
+ process.exit(1)
+})
diff --git a/services/history-v1/storage/scripts/persist_and_expire_queues.sh b/services/history-v1/storage/scripts/persist_and_expire_queues.sh
new file mode 100644
index 0000000000..35b057f52c
--- /dev/null
+++ b/services/history-v1/storage/scripts/persist_and_expire_queues.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+node storage/scripts/persist_redis_chunks.mjs --queue --max-time 270
+node storage/scripts/expire_redis_chunks.js --post-request
diff --git a/services/history-v1/storage/scripts/persist_redis_chunks.mjs b/services/history-v1/storage/scripts/persist_redis_chunks.mjs
new file mode 100644
index 0000000000..dd7e9f3a51
--- /dev/null
+++ b/services/history-v1/storage/scripts/persist_redis_chunks.mjs
@@ -0,0 +1,181 @@
+import config from 'config'
+import PQueue from 'p-queue'
+import { fetchNothing } from '@overleaf/fetch-utils'
+import logger from '@overleaf/logger'
+import commandLineArgs from 'command-line-args'
+import * as redis from '../lib/redis.js'
+import knex from '../lib/knex.js'
+import knexReadOnly from '../lib/knex_read_only.js'
+import { client } from '../lib/mongodb.js'
+import { scanAndProcessDueItems } from '../lib/scan.js'
+import persistBuffer from '../lib/persist_buffer.js'
+import { claimPersistJob } from '../lib/chunk_store/redis.js'
+import { loadGlobalBlobs } from '../lib/blob_store/index.js'
+import { EventEmitter } from 'node:events'
+import { fileURLToPath } from 'node:url'
+
+// Something is registering 11 listeners, over the limit of 10, which generates
+// a lot of warning noise.
+EventEmitter.defaultMaxListeners = 11
+
+const rclient = redis.rclientHistory
+
+const optionDefinitions = [
+ { name: 'dry-run', alias: 'd', type: Boolean },
+ { name: 'queue', type: Boolean },
+ { name: 'max-time', type: Number },
+ { name: 'min-rate', type: Number, defaultValue: 1 },
+]
+const options = commandLineArgs(optionDefinitions)
+const DRY_RUN = options['dry-run'] || false
+const USE_QUEUE = options.queue || false
+const MAX_TIME = options['max-time'] || null
+const MIN_RATE = options['min-rate']
+const HISTORY_V1_URL = `http://${process.env.HISTORY_V1_HOST || 'localhost'}:${process.env.PORT || 3100}`
+let isShuttingDown = false
+
+logger.initialize('persist-redis-chunks')
+
+async function persistProjectAction(projectId) {
+ const job = await claimPersistJob(projectId)
+ // Set limits to force us to persist all of the changes.
+ const farFuture = new Date()
+ farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
+ const limits = {
+ maxChanges: 0,
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ autoResync: true,
+ }
+ await persistBuffer(projectId, limits)
+ if (job && job.close) {
+ await job.close()
+ }
+}
+
+async function requestProjectFlush(projectId) {
+ const job = await claimPersistJob(projectId)
+ logger.debug({ projectId }, 'sending project flush request')
+ const url = `${HISTORY_V1_URL}/api/projects/${projectId}/flush`
+ const credentials = Buffer.from(
+ `staging:${config.get('basicHttpAuth.password')}`
+ ).toString('base64')
+ await fetchNothing(url, {
+ method: 'POST',
+ headers: {
+ Authorization: `Basic ${credentials}`,
+ },
+ })
+ if (job && job.close) {
+ await job.close()
+ }
+}
+
+async function persistQueuedProjects(queuedProjects) {
+ const totalCount = queuedProjects.size
+ // Compute the rate at which we need to dispatch requests
+ const targetRate = MAX_TIME > 0 ? Math.ceil(totalCount / MAX_TIME) : 0
+ // Rate limit to spread the requests over the interval.
+ const queue = new PQueue({
+ intervalCap: Math.max(MIN_RATE, targetRate),
+ interval: 1000, // use a 1 second interval
+ })
+ logger.info(
+ { totalCount, targetRate, minRate: MIN_RATE, maxTime: MAX_TIME },
+ 'dispatching project flush requests'
+ )
+ const startTime = Date.now()
+ let dispatchedCount = 0
+ for (const projectId of queuedProjects) {
+ if (isShuttingDown) {
+ logger.info('Shutting down, stopping project flush requests')
+ queue.clear()
+ break
+ }
+ queue.add(async () => {
+ try {
+ await requestProjectFlush(projectId)
+ } catch (err) {
+ logger.error({ err, projectId }, 'error while flushing project')
+ }
+ })
+ dispatchedCount++
+ if (dispatchedCount % 1000 === 0) {
+ logger.info(
+ { count: dispatchedCount },
+ 'dispatched project flush requests'
+ )
+ }
+ await queue.onEmpty()
+ }
+ const elapsedTime = Math.floor((Date.now() - startTime) / 1000)
+ logger.info(
+ { count: totalCount, elapsedTime },
+ 'dispatched project flush requests'
+ )
+ await queue.onIdle()
+}
+
+async function runPersistChunks() {
+ const queuedProjects = new Set()
+
+ async function queueProjectAction(projectId) {
+ queuedProjects.add(projectId)
+ }
+
+ await loadGlobalBlobs()
+ await scanAndProcessDueItems(
+ rclient,
+ 'persistChunks',
+ 'persist-time',
+ USE_QUEUE ? queueProjectAction : persistProjectAction,
+ DRY_RUN
+ )
+
+ if (USE_QUEUE) {
+ if (isShuttingDown) {
+ logger.info('Shutting down, skipping queued project persistence')
+ return
+ }
+ logger.info(
+ { count: queuedProjects.size },
+ 'queued projects for persistence'
+ )
+ await persistQueuedProjects(queuedProjects)
+ }
+}
+
+async function main() {
+ try {
+ await runPersistChunks()
+ } catch (err) {
+ logger.fatal(
+ { err, taskName: 'persistChunks' },
+ 'Unhandled error in runPersistChunks'
+ )
+ process.exit(1)
+ } finally {
+ await redis.disconnect()
+ await client.close()
+ await knex.destroy()
+ await knexReadOnly.destroy()
+ }
+}
+
+function gracefulShutdown() {
+ if (isShuttingDown) {
+ return
+ }
+ isShuttingDown = true
+ logger.info({ isShuttingDown }, 'received shutdown signal, cleaning up...')
+}
+
+// Check if the module is being run directly
+const currentScriptPath = fileURLToPath(import.meta.url)
+if (process.argv[1] === currentScriptPath) {
+ process.on('SIGINT', gracefulShutdown)
+ process.on('SIGTERM', gracefulShutdown)
+ main()
+}
+
+export { runPersistChunks }
diff --git a/services/history-v1/storage/scripts/recover_doc_versions.js b/services/history-v1/storage/scripts/recover_doc_versions.js
index f121c60afd..650fb20324 100644
--- a/services/history-v1/storage/scripts/recover_doc_versions.js
+++ b/services/history-v1/storage/scripts/recover_doc_versions.js
@@ -279,7 +279,7 @@ async function processProject(project, summary) {
async function getHistoryDocVersions(project) {
const historyId = project.overleaf.history.id
- const chunk = await chunkStore.loadLatest(historyId)
+ const chunk = await chunkStore.loadLatest(historyId, { persistedOnly: true })
if (chunk == null) {
return []
}
diff --git a/services/history-v1/storage/scripts/recover_zip_from_backup.mjs b/services/history-v1/storage/scripts/recover_zip_from_backup.mjs
new file mode 100644
index 0000000000..4cf7051fcd
--- /dev/null
+++ b/services/history-v1/storage/scripts/recover_zip_from_backup.mjs
@@ -0,0 +1,233 @@
+// @ts-check
+import { loadGlobalBlobs } from '../lib/blob_store/index.js'
+import commandLineArgs from 'command-line-args'
+import assert from '../lib/assert.js'
+import fs from 'node:fs'
+import { setTimeout } from 'node:timers/promises'
+import {
+ archiveLatestChunk,
+ archiveRawProject,
+ BackupPersistorError,
+} from '../lib/backupArchiver.mjs'
+import knex from '../lib/knex.js'
+import { client } from '../lib/mongodb.js'
+import archiver from 'archiver'
+import Events from 'node:events'
+import { Chunk } from 'overleaf-editor-core'
+import _ from 'lodash'
+
+// Silence warning.
+Events.setMaxListeners(20)
+
+const SUPPORTED_MODES = ['raw', 'latest']
+
+// Pads the mode name to a fixed length for better alignment in output.
+const padModeName = _.partialRight(
+ _.padEnd,
+ Math.max(...SUPPORTED_MODES.map(mode => mode.length))
+)
+
+const SUPPORTED_MODES_HELP = {
+ raw: 'Retrieve all chunk and blob files from the project backup.',
+ latest: 'Retrieves the last backed up state of the project.',
+}
+
+// outputFile needs to be available in the shutdown function (which may be called before it's declared)
+// eslint-disable-next-line prefer-const
+let outputFile
+
+/**
+ * Gracefully shutdown the process
+ * @param {number} code
+ */
+async function shutdown(code = 0) {
+ if (outputFile) {
+ outputFile.close()
+ }
+ await knex.destroy()
+ await client.close()
+ await setTimeout(1000)
+ process.exit(code)
+}
+
+function usage() {
+ console.log(
+ 'Usage: node recover_zip_from_backup.mjs --historyId= --output= [--mode=] [--verbose] [--useBackupGlobalBlobs]'
+ )
+ console.log(
+ '--useBackupGlobalBlobs can be used if the global blobs have not been restored from the backup yet.'
+ )
+ console.log('Supported modes: ' + SUPPORTED_MODES.join(', '))
+ SUPPORTED_MODES.forEach(mode => {
+ console.log(
+ ` --mode=${padModeName(mode)} - ${SUPPORTED_MODES_HELP[mode] || ''}`
+ )
+ })
+}
+
+/**
+ * @typedef {import('archiver').ZipArchive} ZipArchive
+ */
+
+/**
+ * @typedef {import('archiver').ProgressData} ProgressData
+ */
+
+/**
+ * @typedef {import('archiver').EntryData} EntryData
+ */
+
+/**
+ * @typedef {Object} ArchiverError
+ * @property {string} message
+ * @property {string} code
+ * @property {Object} data
+ */
+
+let historyId, help, mode, output, useBackupGlobalBlobs, verbose
+
+try {
+ ;({ historyId, help, mode, output, useBackupGlobalBlobs, verbose } =
+ commandLineArgs([
+ { name: 'historyId', type: String },
+ { name: 'output', type: String },
+ { name: 'mode', type: String, defaultValue: 'raw' },
+ { name: 'verbose', type: Boolean, defaultValue: false },
+ { name: 'useBackupGlobalBlobs', type: Boolean, defaultValue: false },
+ { name: 'help', type: Boolean },
+ ]))
+} catch (err) {
+ console.error(err instanceof Error ? err.message : err)
+ help = true
+}
+
+if (help) {
+ usage()
+ await shutdown(0)
+}
+
+if (!historyId) {
+ console.error('missing --historyId')
+ usage()
+ await shutdown(1)
+}
+
+if (!output) {
+ console.error('missing --output')
+ usage()
+ await shutdown(1)
+}
+
+try {
+ assert.projectId(historyId)
+} catch (error) {
+ console.error('Invalid history ID')
+ await shutdown(1)
+}
+
+if (!SUPPORTED_MODES.includes(mode)) {
+ console.error(
+ 'Invalid mode; supported modes are: ' + SUPPORTED_MODES.join(', ')
+ )
+ await shutdown(1)
+}
+
+await loadGlobalBlobs()
+
+outputFile = fs.createWriteStream(output)
+
+const archive = archiver.create('zip', {})
+
+archive.on('close', function () {
+ console.log(archive.pointer() + ' total bytes')
+ console.log(`Wrote ${output}`)
+ shutdown().catch(e => console.error('Error shutting down', e))
+})
+
+archive.on(
+ 'error',
+ /**
+ *
+ * @param {ArchiverError} e
+ */
+ function (e) {
+ console.error(`Error writing archive: ${e.message}`)
+ }
+)
+
+archive.on('end', function () {
+ console.log(`Wrote ${archive.pointer()} total bytes to ${output}`)
+ shutdown().catch(e => console.error('Error shutting down', e))
+})
+
+archive.on(
+ 'progress',
+ /**
+ *
+ * @param {ProgressData} progress
+ */
+ function (progress) {
+ if (verbose) {
+ console.log(
+ `${progress.entries.processed} processed out of ${progress.entries.total}`
+ )
+ }
+ }
+)
+
+archive.on(
+ 'entry',
+ /**
+ *
+ * @param {EntryData} entry
+ */
+ function (entry) {
+ if (verbose) {
+ console.log(`${entry.name} added`)
+ }
+ }
+)
+
+archive.on(
+ 'warning',
+ /**
+ *
+ * @param {ArchiverError} warning
+ */
+ function (warning) {
+ console.warn(`Warning encountered when writing archive: ${warning.message}`)
+ }
+)
+
+try {
+ switch (mode) {
+ case 'latest':
+ await archiveLatestChunk(archive, historyId, useBackupGlobalBlobs)
+ break
+ case 'raw':
+ default:
+ await archiveRawProject(archive, historyId, useBackupGlobalBlobs)
+ break
+ }
+ archive.pipe(outputFile)
+} catch (error) {
+ if (error instanceof BackupPersistorError) {
+ console.error(error.message)
+ }
+ if (error instanceof Chunk.NotPersistedError) {
+ console.error('Chunk not found. Project may not have been fully backed up.')
+ }
+ if (verbose) {
+ console.error(error)
+ } else {
+ console.error('Error encountered when writing archive')
+ }
+} finally {
+ await Promise.race([
+ await archive.finalize(),
+ setTimeout(10000).then(() => {
+ console.error('Archive did not finalize in time')
+ return shutdown(1)
+ }),
+ ])
+}
diff --git a/services/history-v1/storage/scripts/redis.mjs b/services/history-v1/storage/scripts/redis.mjs
new file mode 100644
index 0000000000..ce9a39891f
--- /dev/null
+++ b/services/history-v1/storage/scripts/redis.mjs
@@ -0,0 +1,36 @@
+import redis from '@overleaf/redis-wrapper'
+import config from 'config'
+
+// Get allowed Redis dbs from config
+const redisConfig = config.get('redis')
+const allowedDbs = Object.keys(redisConfig)
+
+// Get the Redis db from command line argument or use the first available db as default
+const db = process.argv[2]
+
+// Validate redis db
+if (!allowedDbs.includes(db)) {
+ if (db) {
+ console.error('Invalid redis db:', db)
+ }
+ console.error(`Usage: node redis.mjs [${allowedDbs.join('|')}]`)
+ process.exit(1)
+}
+
+// Get redis options based on command line argument
+const redisOptions = config.get(`redis.${db}`)
+console.log('Using redis db:', db)
+console.log('REDIS CONFIG', {
+ ...redisOptions,
+ password: '*'.repeat(redisOptions.password?.length),
+})
+const rclient = redis.createClient(redisOptions)
+
+try {
+ await rclient.healthCheck()
+ console.log('REDIS HEALTHCHECK SUCCEEDED')
+} catch (error) {
+ console.error('REDIS HEALTHCHECK FAILED', error)
+} finally {
+ await rclient.quit()
+}
diff --git a/services/history-v1/storage/scripts/remove_backed_up_blobs.mjs b/services/history-v1/storage/scripts/remove_backed_up_blobs.mjs
new file mode 100644
index 0000000000..0fa72011ab
--- /dev/null
+++ b/services/history-v1/storage/scripts/remove_backed_up_blobs.mjs
@@ -0,0 +1,104 @@
+// @ts-check
+import { readFileSync } from 'node:fs'
+import commandLineArgs from 'command-line-args'
+import { client } from '../lib/mongodb.js'
+import {
+ getBackedUpBlobHashes,
+ unsetBackedUpBlobHashes,
+} from '../lib/backup_store/index.js'
+
+let gracefulShutdownInitiated = false
+
+// Parse command line arguments
+const args = commandLineArgs([
+ { name: 'input', type: String, alias: 'i', defaultOption: true },
+ { name: 'commit', type: Boolean, default: false },
+])
+
+if (!args.input) {
+ console.error(
+ 'Usage: node remove_backed_up_blobs.mjs --input [--commit]'
+ )
+ process.exit(1)
+}
+
+if (!args.commit) {
+ console.log('Running in dry-run mode. Use --commit to apply changes.')
+}
+
+// Signal handling
+process.on('SIGINT', handleSignal)
+process.on('SIGTERM', handleSignal)
+
+function handleSignal() {
+ console.warn('Graceful shutdown initiated')
+ gracefulShutdownInitiated = true
+}
+
+// Process CSV and remove blobs
+async function main() {
+ const projectBlobs = new Map()
+ const lines = readFileSync(args.input, 'utf8').split('\n')
+ const SHA1_HEX_REGEX = /^[a-f0-9]{40}$/
+
+ // Skip header
+ for (const line of lines.slice(1)) {
+ if (!line.trim() || gracefulShutdownInitiated) break
+
+ const [projectId, path] = line.split(',')
+ const pathParts = path.split('/')
+ const hash = pathParts[3] + pathParts[4]
+
+ if (!SHA1_HEX_REGEX.test(hash)) {
+ console.warn(`Invalid SHA1 hash for project ${projectId}: ${hash}`)
+ continue
+ }
+
+ if (!projectBlobs.has(projectId)) {
+ projectBlobs.set(projectId, new Set())
+ }
+ projectBlobs.get(projectId).add(hash)
+ }
+
+ // Process each project
+ for (const [projectId, hashes] of projectBlobs) {
+ if (gracefulShutdownInitiated) break
+
+ if (!args.commit) {
+ console.log(
+ `DRY-RUN: would remove ${hashes.size} blobs from project ${projectId}`
+ )
+ continue
+ }
+
+ try {
+ const originalHashes = await getBackedUpBlobHashes(projectId)
+ if (originalHashes.size === 0) {
+ continue
+ }
+ const result = await unsetBackedUpBlobHashes(
+ projectId,
+ Array.from(hashes)
+ )
+ if (result) {
+ console.log(
+ `Project ${projectId}: want to remove ${hashes.size}, removed ${originalHashes.size - result.blobs.length}, ${result.blobs.length} remaining`
+ )
+ }
+ } catch (err) {
+ console.error(`Error updating project ${projectId}:`, err)
+ }
+ }
+}
+
+// Run the script
+main()
+ .catch(err => {
+ console.error('Fatal error:', err)
+ process.exitCode = 1
+ })
+ .finally(() => {
+ client
+ .close()
+ .catch(err => console.error('Error closing MongoDB connection:', err))
+ })
diff --git a/services/history-v1/storage/scripts/remove_backup_blobs_from_wrong_path.mjs b/services/history-v1/storage/scripts/remove_backup_blobs_from_wrong_path.mjs
new file mode 100644
index 0000000000..119da2f308
--- /dev/null
+++ b/services/history-v1/storage/scripts/remove_backup_blobs_from_wrong_path.mjs
@@ -0,0 +1,221 @@
+// @ts-check
+
+/**
+ * This script is used to remove blobs that have been backed up under the project ID
+ * instead of the history ID (where those are different).
+ *
+ * This script reads a CSV file with the following format:
+ * ```
+ * project_id,hash
+ * ,
+ * ```
+ *
+ * The header row is optional. All rows will be checked for conformance to the format.
+ */
+
+import commandLineArgs from 'command-line-args'
+import { backupPersistor, projectBlobsBucket } from '../lib/backupPersistor.mjs'
+import { makeProjectKey } from '../lib/blob_store/index.js'
+import fs from 'node:fs'
+import assert from '../lib/assert.js'
+import { client } from '../lib/mongodb.js'
+import { verifyBlobs } from '../lib/backupVerifier.mjs'
+import { setTimeout } from 'node:timers/promises'
+import { getHistoryId } from '../lib/backup_store/index.js'
+
+const argsSchema = [
+ {
+ name: 'input',
+ type: String,
+ },
+ {
+ name: 'commit',
+ type: Boolean,
+ },
+ {
+ name: 'header',
+ type: Boolean,
+ },
+ {
+ name: 'force',
+ type: Boolean,
+ },
+ {
+ name: 'verbose',
+ type: Boolean,
+ },
+]
+
+const args = commandLineArgs(argsSchema)
+
+async function gracefulClose(code = 0) {
+ await client.close()
+ process.exit(code)
+}
+
+/**
+ *
+ * @param {(value: unknown) => void} fn
+ * @param {unknown} value
+ * @return {boolean}
+ */
+function not(fn, value) {
+ try {
+ fn(value)
+ return false
+ } catch {
+ return true
+ }
+}
+
+/**
+ *
+ * @param {string} row
+ * @return {{projectId: string, hash: string}}
+ */
+function parseCSVRow(row) {
+ const [projectId, hash] = row.split(',')
+ assert.mongoId(projectId, `invalid projectId ${projectId}`)
+ assert.blobHash(hash, `invalid hash ${hash}`)
+ return { projectId, hash }
+}
+
+/**
+ *
+ * @param {string} path
+ * @param {boolean} hasHeader
+ * @return {AsyncGenerator<{projectId: string, hash: string}, void, *>}
+ */
+async function* readCSV(path, hasHeader) {
+ let seenHeader = !hasHeader
+ let fh
+ try {
+ fh = await fs.promises.open(path, 'r')
+ } catch (error) {
+ console.error(`Could not open file: ${error}`)
+ return await gracefulClose(1)
+ }
+ for await (const line of fh.readLines()) {
+ if (!seenHeader) {
+ const [first, second] = line.split(',')
+ const noDataInHeader =
+ not(assert.mongoId, first) && not(assert.blobHash, second)
+ if (!noDataInHeader) {
+ console.error('Data found in header row')
+ return await gracefulClose(1)
+ }
+ seenHeader = true
+ continue
+ }
+ try {
+ yield parseCSVRow(line)
+ } catch (error) {
+ console.error(error instanceof Error ? error.message : error)
+ console.info(`Skipping invalid row: ${line}`)
+ }
+ }
+}
+
+function usage() {
+ console.info(
+ 'Usage: remove_blobs_from_backup.mjs --input [--commit] [--header] [--force] [--verbose]'
+ )
+}
+
+if (!args.input) {
+ console.error('--input was missing')
+ usage()
+ await gracefulClose(1)
+}
+
+/**
+ *
+ * @param {string} projectId
+ * @param {string} hash
+ * @return {Promise}
+ */
+async function deleteBlob(projectId, hash) {
+ const path = makeProjectKey(projectId, hash)
+ if (args.commit) {
+ await backupPersistor.deleteObject(projectBlobsBucket, path)
+ } else {
+ console.log(`DELETE: ${path}`)
+ }
+}
+
+/**
+ *
+ * @param {string} projectId
+ * @param {string} hash
+ * @return {Promise}
+ */
+async function canDeleteBlob(projectId, hash) {
+ let historyId
+ try {
+ historyId = await getHistoryId(projectId)
+ } catch (error) {
+ if (args.verbose) {
+ console.error(error)
+ }
+ throw new Error(`No history ID found for project ${projectId}, skipping`)
+ }
+ if (historyId === projectId) {
+ throw new Error(
+ `Project ID and history ID are the same for ${projectId} - use --force to delete anyway`
+ )
+ }
+
+ // TODO: fix assert.postgresId to handle integers better and then stop coercing to string below
+ assert.postgresId(
+ `${historyId}`,
+ `History ID ${historyId} does not appear to be for a postgres project`
+ )
+
+ try {
+ await verifyBlobs(`${historyId}`, [hash])
+ } catch (error) {
+ if (args.verbose) {
+ console.error(error)
+ }
+ throw new Error(
+ `Blob ${hash} is not backed up for project ${projectId} - use --force to delete anyway`
+ )
+ }
+}
+
+if (!args.commit) {
+ console.log('DRY RUN: provide --commit to perform operations')
+}
+
+if (args.force) {
+ console.log(
+ 'WARNING: --force is enabled, blobs will be deleted regardless of backup status'
+ )
+ await setTimeout(5_000)
+}
+
+let deleted = 0
+let errors = 0
+
+for await (const { projectId, hash } of readCSV(args.input, args.header)) {
+ if (!args.force) {
+ try {
+ await canDeleteBlob(projectId, hash)
+ } catch (error) {
+ console.error(error instanceof Error ? error.message : error)
+ continue
+ }
+ }
+ try {
+ await deleteBlob(projectId, hash)
+ deleted++
+ } catch (error) {
+ errors++
+ console.error(error)
+ }
+}
+
+console.log(`Deleted: ${deleted}`)
+console.log(`Errors: ${errors}`)
+
+await gracefulClose()
diff --git a/services/history-v1/storage/scripts/show.mjs b/services/history-v1/storage/scripts/show.mjs
new file mode 100644
index 0000000000..51697dc38f
--- /dev/null
+++ b/services/history-v1/storage/scripts/show.mjs
@@ -0,0 +1,275 @@
+import commandLineArgs from 'command-line-args'
+import {
+ loadAtVersion,
+ getChunkMetadataForVersion,
+ getProjectChunksFromVersion,
+} from '../lib/chunk_store/index.js'
+import { client } from '../lib/mongodb.js'
+import knex from '../lib/knex.js'
+import redis from '../lib/redis.js'
+import {
+ loadGlobalBlobs,
+ BlobStore,
+ makeProjectKey,
+} from '../lib/blob_store/index.js'
+import { TextDecoder } from 'node:util'
+import {
+ backupPersistor,
+ chunksBucket,
+ projectBlobsBucket,
+} from '../lib/backupPersistor.mjs'
+import fs from 'node:fs'
+import { pipeline } from 'node:stream/promises'
+import os from 'node:os'
+import path from 'node:path'
+import { createHash } from 'node:crypto'
+import projectKey from '../lib/project_key.js'
+import { createGunzip } from 'node:zlib'
+import { text } from 'node:stream/consumers'
+
+const optionDefinitions = [
+ { name: 'historyId', alias: 'p', type: String },
+ { name: 'version', alias: 'v', type: Number },
+ { name: 'blob', alias: 'b', type: String },
+ { name: 'remote', alias: 'r', type: Boolean },
+ { name: 'keep', alias: 'k', type: Boolean },
+]
+
+function makeChunkKey(projectId, startVersion) {
+ return path.join(projectKey.format(projectId), projectKey.pad(startVersion))
+}
+
+async function listChunks(historyId) {
+ for await (const chunkRecord of getProjectChunksFromVersion(historyId, 0)) {
+ console.log('Chunk record:', chunkRecord)
+ }
+}
+
+async function fetchChunkLocal(historyId, version) {
+ const chunkRecord = await getChunkMetadataForVersion(historyId, version)
+ const chunk = await loadAtVersion(historyId, version)
+ const persistedChunk = await loadAtVersion(historyId, version, {
+ persistedOnly: true,
+ })
+ return {
+ key: version,
+ chunk,
+ persistedChunk,
+ metadata: chunkRecord,
+ source: 'local storage',
+ }
+}
+
+async function fetchChunkRemote(historyId, version) {
+ const chunkRecord = await getChunkMetadataForVersion(historyId, version)
+ const startVersion = chunkRecord.startVersion
+ const key = makeChunkKey(historyId, startVersion)
+ const backupPersistorForProject = await backupPersistor.forProject(
+ chunksBucket,
+ key
+ )
+ const backupChunkStream = await backupPersistorForProject.getObjectStream(
+ chunksBucket,
+ key
+ )
+ const backupStr = await text(backupChunkStream.pipe(createGunzip()))
+ return {
+ key,
+ chunk: JSON.parse(backupStr),
+ metadata: chunkRecord,
+ source: 'remote backup',
+ }
+}
+
+async function displayChunk(historyId, version, options) {
+ const { key, chunk, persistedChunk, metadata, source } = await (options.remote
+ ? fetchChunkRemote(historyId, version)
+ : fetchChunkLocal(historyId, version))
+ console.log('Source:', source)
+ console.log('Chunk record', metadata)
+ console.log('Key', key)
+ // console.log('Number of changes', chunk.getChanges().length)
+ console.log(JSON.stringify(chunk))
+ if (
+ persistedChunk &&
+ persistedChunk.getChanges().length !== chunk.getChanges().length
+ ) {
+ console.warn(
+ 'Warning: Local chunk and persisted chunk have different number of changes:',
+ chunk.getChanges().length,
+ 'local (including buffer) vs',
+ persistedChunk.getChanges().length,
+ 'persisted'
+ )
+ }
+}
+
+async function fetchBlobRemote(historyId, blobHash) {
+ const backupPersistorForProject = await backupPersistor.forProject(
+ projectBlobsBucket,
+ makeProjectKey(historyId, '')
+ )
+ const blobKey = makeProjectKey(historyId, blobHash)
+ return {
+ stream: await backupPersistorForProject.getObjectStream(
+ projectBlobsBucket,
+ blobKey,
+ { autoGunzip: true }
+ ),
+ metadata: { hash: blobHash },
+ source: 'remote backup',
+ }
+}
+
+async function fetchBlobLocal(historyId, blobHash) {
+ const blobStore = new BlobStore(historyId)
+ const blob = await blobStore.getBlob(blobHash)
+ if (!blob) throw new Error(`Blob ${blobHash} not found`)
+ return {
+ stream: await blobStore.getStream(blobHash),
+ metadata: blob,
+ source: 'local storage',
+ }
+}
+
+async function displayBlobContent(filepath, metadata, source, blobHash) {
+ console.log('Source:', source)
+ console.log('Blob metadata:', metadata)
+
+ // Compute git hash using streaming
+ const stat = fs.statSync(filepath)
+ const header = `blob ${stat.size}\0`
+ const hash = createHash('sha1')
+ hash.update(header)
+
+ const hashStream = fs.createReadStream(filepath)
+ for await (const chunk of hashStream) {
+ hash.update(chunk)
+ }
+ const gitHash = hash.digest('hex')
+
+ // Check content type and display preview
+ const fd = fs.openSync(filepath, 'r')
+ try {
+ const headBuf = Buffer.alloc(16)
+ const tailBuf = Buffer.alloc(16)
+
+ try {
+ // Stream through TextDecoderStream to check for valid UTF-8
+ const textStream = fs.createReadStream(filepath)
+ const decoder = new TextDecoder('utf-8', { fatal: true })
+ for await (const chunk of textStream) {
+ decoder.decode(chunk, { stream: true })
+ }
+ decoder.decode()
+ // If we get here, it's valid UTF-8
+ if (stat.size <= 1024) {
+ console.log('Content (text):', await fs.readFileSync(filepath, 'utf8'))
+ } else {
+ console.log('Content (text, truncated):')
+ console.log(` Length: ${stat.size} bytes`)
+ fs.readSync(fd, headBuf, 0, 16, 0)
+ fs.readSync(fd, tailBuf, 0, 16, stat.size - 16)
+ console.log(
+ ' Content:',
+ headBuf.toString('utf8') +
+ ' ...(truncated)... ' +
+ tailBuf.toString('utf8')
+ )
+ }
+ } catch (e) {
+ // Binary content - show head and tail
+ console.log('Content (binary):')
+ console.log(` Length: ${stat.size} bytes`)
+
+ if (stat.size <= 32) {
+ // Small file - read it all
+ const buf = Buffer.alloc(stat.size)
+ fs.readSync(fd, buf, 0, stat.size, 0)
+ const hexBytes = buf.toString('hex').match(/../g).join(' ')
+ console.log(' Bytes:', hexBytes)
+ } else {
+ // Read tail for large files
+ fs.readSync(fd, headBuf, 0, 16, 0)
+ fs.readSync(fd, tailBuf, 0, 16, stat.size - 16)
+ const headHex = headBuf.toString('hex').match(/../g).join(' ')
+ const tailHex = tailBuf.toString('hex').match(/../g).join(' ')
+ console.log(' Bytes:', headHex + ' ... ' + tailHex)
+ }
+ console.log(' Git-style SHA1:', gitHash)
+ if (gitHash !== blobHash) {
+ console.log(' Warning: Git hash differs from blob hash!\x1b[0m')
+ console.log(' Blob hash:', blobHash)
+ }
+ }
+ } finally {
+ fs.closeSync(fd)
+ }
+}
+
+async function withTempDir(prefix, fn, options = {}) {
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), prefix))
+ try {
+ return await Promise.resolve(fn(tmpDir))
+ } finally {
+ if (!options.keep) {
+ fs.rmSync(tmpDir, { recursive: true, force: true })
+ } else {
+ console.log('Keeping temporary file:', path.join(tmpDir, 'blob'))
+ }
+ }
+}
+
+async function displayBlob(historyId, blobHash, options) {
+ try {
+ const { stream, metadata, source } = await (options.remote
+ ? fetchBlobRemote(historyId, blobHash)
+ : fetchBlobLocal(historyId, blobHash))
+
+ await withTempDir(
+ 'blob-show-',
+ async tmpDir => {
+ const tmpPath = path.join(tmpDir, 'blob')
+ await pipeline(stream, fs.createWriteStream(tmpPath))
+ await displayBlobContent(tmpPath, metadata, source, blobHash)
+ },
+ { keep: options.keep }
+ )
+ } catch (err) {
+ if (err.code === 'NoSuchKey') {
+ throw new Error(`Blob ${blobHash} not found in backup`)
+ }
+ throw err
+ }
+}
+
+async function main() {
+ const { historyId, version, blob, remote, keep } =
+ commandLineArgs(optionDefinitions)
+ if (!historyId) {
+ console.error('Error: --historyId is required.')
+ process.exit(1)
+ }
+ await loadGlobalBlobs()
+ if (version != null) {
+ await displayChunk(historyId, version, { remote })
+ } else if (blob != null) {
+ await displayBlob(historyId, blob, { remote, keep })
+ } else {
+ await listChunks(historyId)
+ }
+}
+
+main()
+ .then(() => console.log('Done.'))
+ .catch(err => {
+ console.error('Error:', err)
+ process.exit(1)
+ })
+ .finally(() => {
+ knex.destroy().catch(err => console.error('Error closing Postgres:', err))
+ client.close().catch(err => console.error('Error closing MongoDB:', err))
+ redis
+ .disconnect()
+ .catch(err => console.error('Error disconnecting Redis:', err))
+ })
diff --git a/services/history-v1/storage/scripts/show_buffer.js b/services/history-v1/storage/scripts/show_buffer.js
new file mode 100644
index 0000000000..1d80ee227d
--- /dev/null
+++ b/services/history-v1/storage/scripts/show_buffer.js
@@ -0,0 +1,117 @@
+#!/usr/bin/env node
+// @ts-check
+
+const { rclientHistory: rclient } = require('../lib/redis')
+const { keySchema } = require('../lib/chunk_store/redis')
+const commandLineArgs = require('command-line-args')
+
+const optionDefinitions = [
+ { name: 'historyId', type: String, defaultOption: true },
+]
+
+// Column width for key display alignment; can be overridden with COL_WIDTH env variable
+const COLUMN_WIDTH = process.env.COL_WIDTH
+ ? parseInt(process.env.COL_WIDTH, 10)
+ : 45
+
+let options
+try {
+ options = commandLineArgs(optionDefinitions)
+} catch (e) {
+ console.error(
+ 'Error parsing command line arguments:',
+ e instanceof Error ? e.message : String(e)
+ )
+ console.error('Usage: ./show_buffer.js ')
+ process.exit(1)
+}
+
+const { historyId } = options
+
+if (!historyId) {
+ console.error('Usage: ./show_buffer.js ')
+ process.exit(1)
+}
+
+function format(str, indent = COLUMN_WIDTH + 2) {
+ const lines = str.split('\n')
+ for (let i = 1; i < lines.length; i++) {
+ lines[i] = ' '.repeat(indent) + lines[i]
+ }
+ return lines.join('\n')
+}
+
+async function displayKeyValue(
+ rclient,
+ key,
+ { parseJson = false, formatDate = false } = {}
+) {
+ const value = await rclient.get(key)
+ let displayValue = '(nil)'
+ if (value) {
+ if (parseJson) {
+ try {
+ displayValue = format(JSON.stringify(JSON.parse(value), null, 2))
+ } catch (e) {
+ displayValue = ` Raw value: ${value}`
+ }
+ } else if (formatDate) {
+ const ts = parseInt(value, 10)
+ displayValue = `${new Date(ts).toISOString()} (${value})`
+ } else {
+ displayValue = value
+ }
+ }
+ console.log(`${key.padStart(COLUMN_WIDTH)}: ${displayValue}`)
+}
+
+async function displayBuffer(projectId) {
+ console.log(`Buffer for history ID: ${projectId}`)
+ console.log('--------------------------------------------------')
+
+ try {
+ const headKey = keySchema.head({ projectId })
+ const headVersionKey = keySchema.headVersion({ projectId })
+ const persistedVersionKey = keySchema.persistedVersion({ projectId })
+ const expireTimeKey = keySchema.expireTime({ projectId })
+ const persistTimeKey = keySchema.persistTime({ projectId })
+ const changesKey = keySchema.changes({ projectId })
+
+ await displayKeyValue(rclient, headKey, { parseJson: true })
+ await displayKeyValue(rclient, headVersionKey)
+ await displayKeyValue(rclient, persistedVersionKey)
+ await displayKeyValue(rclient, expireTimeKey, { formatDate: true })
+ await displayKeyValue(rclient, persistTimeKey, { formatDate: true })
+
+ const changesList = await rclient.lrange(changesKey, 0, -1)
+
+ // 6. changes
+ let changesListDisplay = '(nil)'
+ if (changesList) {
+ changesListDisplay = changesList.length
+ ? format(
+ changesList
+ .map((change, index) => `[${index}]: ${change}`)
+ .join('\n')
+ )
+ : '(empty list)'
+ }
+ console.log(`${changesKey.padStart(COLUMN_WIDTH)}: ${changesListDisplay}`)
+ } catch (error) {
+ console.error('Error fetching data from Redis:', error)
+ throw error
+ }
+}
+
+;(async () => {
+ let errorOccurred = false
+ try {
+ await displayBuffer(historyId)
+ } catch (error) {
+ errorOccurred = true
+ } finally {
+ rclient.quit(() => {
+ process.exit(errorOccurred ? 1 : 0)
+ })
+ }
+})()
diff --git a/services/history-v1/storage/scripts/verify_backed_up_blobs.mjs b/services/history-v1/storage/scripts/verify_backed_up_blobs.mjs
new file mode 100644
index 0000000000..257238aad4
--- /dev/null
+++ b/services/history-v1/storage/scripts/verify_backed_up_blobs.mjs
@@ -0,0 +1,153 @@
+// @ts-check
+import { ObjectId } from 'mongodb'
+import knex from '../lib/knex.js'
+import {
+ batchedUpdate,
+ objectIdFromInput,
+ READ_PREFERENCE_SECONDARY,
+} from '@overleaf/mongo-utils/batchedUpdate.js'
+import {
+ GLOBAL_BLOBS,
+ loadGlobalBlobs,
+ makeProjectKey,
+} from '../lib/blob_store/index.js'
+import {
+ backedUpBlobs as backedUpBlobsCollection,
+ db,
+ client,
+} from '../lib/mongodb.js'
+import redis from '../lib/redis.js'
+import commandLineArgs from 'command-line-args'
+import fs from 'node:fs'
+
+const projectsCollection = db.collection('projects')
+
+// Enable caching for ObjectId.toString()
+ObjectId.cacheHexString = true
+
+function parseArgs() {
+ const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
+ const args = commandLineArgs([
+ {
+ name: 'BATCH_RANGE_START',
+ type: String,
+ defaultValue: PUBLIC_LAUNCH_DATE.toISOString(),
+ },
+ {
+ name: 'BATCH_RANGE_END',
+ type: String,
+ defaultValue: new Date().toISOString(),
+ },
+ {
+ name: 'output',
+ type: String,
+ alias: 'o',
+ },
+ ])
+ const BATCH_RANGE_START = objectIdFromInput(
+ args['BATCH_RANGE_START']
+ ).toString()
+ const BATCH_RANGE_END = objectIdFromInput(args['BATCH_RANGE_END']).toString()
+ if (!args['output']) {
+ throw new Error('missing --output')
+ }
+ const OUTPUT_STREAM = fs.createWriteStream(args['output'])
+
+ return {
+ BATCH_RANGE_START,
+ BATCH_RANGE_END,
+ OUTPUT_STREAM,
+ }
+}
+
+const { BATCH_RANGE_START, BATCH_RANGE_END, OUTPUT_STREAM } = parseArgs()
+
+// We need to handle the start and end differently as ids of deleted projects are created at time of deletion.
+if (process.env.BATCH_RANGE_START || process.env.BATCH_RANGE_END) {
+ throw new Error('use --BATCH_RANGE_START and --BATCH_RANGE_END')
+}
+
+let gracefulShutdownInitiated = false
+
+process.on('SIGINT', handleSignal)
+process.on('SIGTERM', handleSignal)
+
+function handleSignal() {
+ gracefulShutdownInitiated = true
+ console.warn('graceful shutdown initiated, draining queue')
+}
+
+async function processBatch(batch) {
+ if (gracefulShutdownInitiated) {
+ throw new Error('graceful shutdown: aborting batch processing')
+ }
+
+ const N = batch.length
+ const firstId = batch[0]._id
+ const lastId = batch[N - 1]._id
+ const projectCursor = await projectsCollection.find(
+ { _id: { $gte: firstId, $lte: lastId } },
+ {
+ projection: { _id: 1, 'overleaf.history.id': 1, lastUpdated: 1 },
+ readPreference: READ_PREFERENCE_SECONDARY,
+ }
+ )
+ const projectMap = new Map()
+ for await (const project of projectCursor) {
+ projectMap.set(project._id.toString(), project)
+ }
+ for (const project of batch) {
+ const projectId = project._id.toString()
+ const projectRecord = projectMap.get(projectId)
+ if (!projectRecord) {
+ console.error(`project not found: ${projectId}`)
+ continue
+ }
+ if (!projectRecord.overleaf?.history?.id) {
+ console.error(`project missing history: ${projectId}`)
+ continue
+ }
+ const historyId = projectRecord.overleaf.history.id.toString()
+ const prefix = `${projectId},${projectRecord.lastUpdated.toISOString()},`
+ const hashes = project.blobs.map(blob => blob.toString('hex'))
+ const projectBlobHashes = hashes.filter(hash => !GLOBAL_BLOBS.has(hash))
+ if (projectBlobHashes.length < hashes.length) {
+ console.warn(
+ `project ${projectId} has ${hashes.length - projectBlobHashes.length} global blobs`
+ )
+ }
+ const rows = projectBlobHashes.map(
+ hash => prefix + makeProjectKey(historyId, hash) + '\n'
+ )
+ OUTPUT_STREAM.write(rows.join(''))
+ }
+}
+
+async function main() {
+ await loadGlobalBlobs()
+ OUTPUT_STREAM.write('projectId,lastUpdated,path\n')
+ await batchedUpdate(
+ backedUpBlobsCollection,
+ {},
+ processBatch,
+ {},
+ {},
+ { BATCH_RANGE_START, BATCH_RANGE_END }
+ )
+}
+
+main()
+ .then(() => console.log('Done.'))
+ .catch(err => {
+ console.error('Error:', err)
+ process.exitCode = 1
+ })
+ .finally(() => {
+ knex.destroy().catch(err => {
+ console.error('Error closing Postgres connection:', err)
+ })
+ client.close().catch(err => console.error('Error closing MongoDB:', err))
+ redis.disconnect().catch(err => {
+ console.error('Error disconnecting Redis:', err)
+ })
+ })
diff --git a/services/history-v1/storage/scripts/verify_blob_backed_up_by_path_bulk.mjs b/services/history-v1/storage/scripts/verify_blob_backed_up_by_path_bulk.mjs
new file mode 100644
index 0000000000..c699b61b13
--- /dev/null
+++ b/services/history-v1/storage/scripts/verify_blob_backed_up_by_path_bulk.mjs
@@ -0,0 +1,177 @@
+import fs from 'node:fs'
+import { makeProjectKey } from '../lib/blob_store/index.js'
+import { backupPersistor, projectBlobsBucket } from '../lib/backupPersistor.mjs'
+import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
+import commandLineArgs from 'command-line-args'
+import OError from '@overleaf/o-error'
+import assert from '../lib/assert.js'
+import { client, projects } from '../lib/mongodb.js'
+import { ObjectId } from 'mongodb'
+import { setTimeout } from 'node:timers/promises'
+
+const { input, verbose } = commandLineArgs([
+ { name: 'input', type: String },
+ { name: 'verbose', type: Boolean, defaultValue: false },
+])
+
+function parseCSVRow(row) {
+ const [path] = row.split(',')
+ const pathSegments = path.split('/')
+ const historyId = `${pathSegments[0]}${pathSegments[1]}${pathSegments[2]}`
+ .split('')
+ .reverse()
+ .join('')
+
+ return { historyId, path, hash: `${pathSegments[3]}${pathSegments[4]}` }
+}
+
+async function* readCSV(path) {
+ let fh
+ try {
+ fh = await fs.promises.open(path, 'r')
+ } catch (error) {
+ console.error(`Could not open file: ${error}`)
+ throw error
+ }
+ for await (const line of fh.readLines()) {
+ try {
+ const row = parseCSVRow(line)
+ yield row
+ } catch (error) {
+ console.error(error instanceof Error ? error.message : error)
+ console.log(`Skipping invalid row: ${line}`)
+ }
+ }
+}
+
+class MissingDEKError extends OError {}
+class InvalidHistoryIdError extends OError {}
+class MissingProjectError extends OError {}
+class MissingBlobError extends OError {}
+
+async function getProjectPersistor(historyId) {
+ try {
+ return await backupPersistor.forProjectRO(
+ projectBlobsBucket,
+ makeProjectKey(historyId, '')
+ )
+ } catch (err) {
+ if (err instanceof NotFoundError) {
+ throw new MissingDEKError('dek does not exist', { historyId }, err)
+ }
+ throw err
+ }
+}
+
+async function checkBlobExists(path, historyId) {
+ const persistor = await getProjectPersistor(historyId)
+ return await persistor.getObjectSize(projectBlobsBucket, path)
+}
+
+let total = 0
+const errors = {
+ invalidProjectId: 0,
+ notBackedUpProjectId: 0,
+ missingBlob: 0,
+ notInMongo: 0,
+ unknown: 0,
+}
+
+const notInMongoProjectIds = new Set()
+const notBackedUpProjectIds = new Set()
+
+let stopping = false
+
+process.on('SIGTERM', () => {
+ console.log('SIGTERM received')
+ stopping = true
+})
+
+process.on('SIGINT', () => {
+ console.log('SIGINT received')
+ stopping = true
+})
+
+/**
+ *
+ * @param {string} historyId
+ * @param {string} path
+ * @param {string} hash
+ * @return {Promise}
+ */
+async function checkPath(historyId, path, hash) {
+ try {
+ assert.mongoId(historyId)
+ } catch (error) {
+ throw InvalidHistoryIdError('invalid history id', { historyId })
+ }
+ if (notInMongoProjectIds.has(historyId)) {
+ throw new MissingProjectError('project not in mongo', { historyId })
+ }
+ if (notBackedUpProjectIds.has(historyId)) {
+ throw new MissingDEKError('project not backed up', { historyId })
+ }
+
+ const project = await projects.findOne({ _id: new ObjectId(historyId) })
+ if (!project) {
+ notInMongoProjectIds.add(historyId)
+ throw new MissingProjectError('project not in mongo', { historyId })
+ }
+ try {
+ await checkBlobExists(path, historyId)
+ } catch (error) {
+ if (error instanceof NotFoundError) {
+ throw new MissingBlobError('missing blob', { historyId, hash })
+ }
+ if (error instanceof MissingDEKError) {
+ notBackedUpProjectIds.add(historyId)
+ }
+ throw error
+ }
+}
+
+for await (const line of readCSV(input)) {
+ if (stopping) break
+ total++
+ if (total % 10_000 === 0) {
+ console.log(`checked ${total}`)
+ }
+ const { historyId, path, hash } = line
+ try {
+ await checkPath(historyId, path, hash)
+ if (verbose) {
+ console.log(`✓ Project ${historyId} has ${hash} backed up`)
+ }
+ } catch (error) {
+ if (error instanceof InvalidHistoryIdError) {
+ errors.invalidProjectId++
+ console.warn(`invalid historyId ${historyId}`)
+ continue
+ } else if (error instanceof MissingProjectError) {
+ errors.notInMongo++
+ console.warn(`✗ project ${historyId} not in mongo`)
+ continue
+ } else if (error instanceof MissingDEKError) {
+ errors.notBackedUpProjectId++
+ console.error(`✗ Project DEK ${historyId} not found`)
+ continue
+ } else if (error instanceof MissingBlobError) {
+ errors.missingBlob++
+ console.error(`✗ missing blob ${hash} from project ${historyId}`)
+ continue
+ }
+ errors.unknown++
+ console.error(error)
+ }
+}
+
+console.log(`total checked: ${total}`)
+console.log(`invalid project id: ${errors.invalidProjectId}`)
+console.log(`not found in mongo: ${errors.notInMongo}`)
+console.log(`missing blob: ${errors.missingBlob}`)
+console.log(`project not backed up: ${errors.notBackedUpProjectId}`)
+console.log(`unknown errors: ${errors.unknown}`)
+
+await client.close()
+await setTimeout(100)
+process.exit()
diff --git a/services/history-v1/storage/scripts/verify_project.mjs b/services/history-v1/storage/scripts/verify_project.mjs
new file mode 100644
index 0000000000..3c26f9b5da
--- /dev/null
+++ b/services/history-v1/storage/scripts/verify_project.mjs
@@ -0,0 +1,35 @@
+import commandLineArgs from 'command-line-args'
+import { verifyProjectWithErrorContext } from '../lib/backupVerifier.mjs'
+import knex from '../lib/knex.js'
+import { client } from '../lib/mongodb.js'
+import redis from '../lib/redis.js'
+import { setTimeout } from 'node:timers/promises'
+import { loadGlobalBlobs } from '../lib/blob_store/index.js'
+
+const { historyId } = commandLineArgs([{ name: 'historyId', type: String }])
+
+async function gracefulShutdown(code = process.exitCode) {
+ await knex.destroy()
+ await client.close()
+ await redis.disconnect()
+ await setTimeout(1_000)
+ process.exit(code)
+}
+
+if (!historyId) {
+ console.error('missing --historyId')
+ process.exitCode = 1
+ await gracefulShutdown()
+}
+
+await loadGlobalBlobs()
+
+try {
+ await verifyProjectWithErrorContext(historyId)
+ console.log('OK')
+} catch (error) {
+ console.error('error verifying', error)
+ process.exitCode = 1
+} finally {
+ await gracefulShutdown()
+}
diff --git a/services/history-v1/storage/scripts/verify_sampled_projects.mjs b/services/history-v1/storage/scripts/verify_sampled_projects.mjs
new file mode 100644
index 0000000000..a74a8b9798
--- /dev/null
+++ b/services/history-v1/storage/scripts/verify_sampled_projects.mjs
@@ -0,0 +1,217 @@
+// @ts-check
+import commandLineArgs from 'command-line-args'
+import {
+ setWriteMetrics,
+ verifyProjectsCreatedInDateRange,
+ verifyRandomProjectSample,
+ verifyProjectsUpdatedInDateRange,
+} from '../../backupVerifier/ProjectVerifier.mjs'
+import knex from '../lib/knex.js'
+import { client } from '../lib/mongodb.js'
+import { setTimeout } from 'node:timers/promises'
+import logger from '@overleaf/logger'
+import { loadGlobalBlobs } from '../lib/blob_store/index.js'
+import { getDatesBeforeRPO } from '../../backupVerifier/utils.mjs'
+import { EventEmitter } from 'node:events'
+import { mongodb } from '../index.js'
+import redis from '../lib/redis.js'
+
+logger.logger.level('fatal')
+
+const usageMessage = [
+ 'Usage: node verify_sampled_projects.mjs [--startDate ] [--endDate ] [--nProjects ] [--verbose] [--usage] [--writeMetrics] [--concurrency ] [--strategy ]',
+ 'strategy: defaults to "range"; startDate and endDate are required for "range" strategy',
+].join('\n')
+
+/**
+ * Gracefully shutdown the process
+ * @param code
+ * @return {Promise}
+ */
+async function gracefulShutdown(code = process.exitCode) {
+ await knex.destroy()
+ await client.close()
+ await redis.disconnect()
+ await setTimeout(1_000)
+ process.exit(code)
+}
+
+const STATS = {
+ verifiable: 0,
+ unverifiable: 0,
+}
+
+/**
+ * @typedef {Object} CLIOptions
+ * @property {(signal: EventEmitter) => Promise} projectVerifier
+ * @property {boolean} verbose
+ */
+
+/**
+ * @typedef {import('../../backupVerifier/types.d.ts').VerificationJobStatus} VerificationJobStatus
+ */
+
+/**
+ *
+ * @return {CLIOptions}
+ */
+function getOptions() {
+ const {
+ startDate,
+ endDate,
+ concurrency,
+ writeMetrics,
+ verbose,
+ nProjects,
+ strategy,
+ usage,
+ } = commandLineArgs([
+ { name: 'startDate', type: String },
+ { name: 'endDate', type: String },
+ { name: 'concurrency', type: Number, defaultValue: 1 },
+ { name: 'verbose', type: Boolean, defaultValue: false },
+ { name: 'nProjects', type: Number, defaultValue: 10 },
+ { name: 'usage', type: Boolean, defaultValue: false },
+ { name: 'writeMetrics', type: Boolean, defaultValue: false },
+ { name: 'strategy', type: String, defaultValue: 'range' },
+ ])
+
+ if (usage) {
+ console.log(usageMessage)
+ process.exit(0)
+ }
+
+ if (!['range', 'random', 'recent'].includes(strategy)) {
+ throw new Error(`Invalid strategy: ${strategy}`)
+ }
+
+ setWriteMetrics(writeMetrics)
+
+ switch (strategy) {
+ case 'random':
+ console.log('Verifying random projects')
+ return {
+ verbose,
+ projectVerifier: signal => verifyRandomProjectSample(nProjects, signal),
+ }
+ case 'recent':
+ return {
+ verbose,
+ projectVerifier: async signal => {
+ const { startDate, endDate } = getDatesBeforeRPO(3 * 3600)
+ return await verifyProjectsUpdatedInDateRange(
+ startDate,
+ endDate,
+ nProjects,
+ signal
+ )
+ },
+ }
+ case 'range':
+ default: {
+ if (!startDate || !endDate) {
+ throw new Error(usageMessage)
+ }
+ const start = Date.parse(startDate)
+ const end = Date.parse(endDate)
+ if (Number.isNaN(start)) {
+ throw new Error(`Invalid start date: ${startDate}`)
+ }
+
+ if (Number.isNaN(end)) {
+ throw new Error(`Invalid end date: ${endDate}`)
+ }
+ if (verbose) {
+ console.log(`Verifying from ${startDate} to ${endDate}`)
+ console.log(`Concurrency: ${concurrency}`)
+ }
+ STATS.ranges = 0
+ return {
+ projectVerifier: signal =>
+ verifyProjectsCreatedInDateRange({
+ startDate: new Date(start),
+ endDate: new Date(end),
+ projectsPerRange: nProjects,
+ concurrency,
+ signal,
+ }),
+ verbose,
+ }
+ }
+ }
+}
+
+/**
+ * @type {CLIOptions}
+ */
+let options
+try {
+ options = getOptions()
+} catch (error) {
+ console.error(error)
+ process.exitCode = 1
+ await gracefulShutdown(1)
+ process.exit() // just here so the type checker knows that the process will exit
+}
+
+const { projectVerifier, verbose } = options
+
+if (verbose) {
+ logger.logger.level('debug')
+}
+
+/**
+ *
+ * @param {Array} array
+ * @param {string} matchString
+ * @return {*}
+ */
+function sumStringInstances(array, matchString) {
+ return array.reduce((total, string) => {
+ return string === matchString ? total + 1 : total
+ }, 0)
+}
+
+/**
+ *
+ * @param {VerificationJobStatus} stats
+ */
+function displayStats(stats) {
+ console.log(`Verified projects: ${stats.verified}`)
+ console.log(`Total projects sampled: ${stats.total}`)
+ if (stats.errorTypes.length > 0) {
+ console.log('Errors:')
+ for (const error of new Set(stats.errorTypes)) {
+ console.log(`${error}: ${sumStringInstances(stats.errorTypes, error)}`)
+ }
+ }
+}
+
+const shutdownEmitter = new EventEmitter()
+
+shutdownEmitter.on('shutdown', async () => {
+ await gracefulShutdown()
+})
+
+process.on('SIGTERM', () => {
+ shutdownEmitter.emit('shutdown')
+})
+
+process.on('SIGINT', () => {
+ shutdownEmitter.emit('shutdown')
+})
+
+await loadGlobalBlobs()
+
+try {
+ const stats = await projectVerifier(shutdownEmitter)
+ displayStats(stats)
+ console.log(`completed`)
+} catch (error) {
+ console.error(error)
+ console.log('completed with errors')
+ process.exitCode = 1
+} finally {
+ console.log('shutting down')
+ await gracefulShutdown()
+}
diff --git a/services/history-v1/storage/tasks/fix_duplicate_versions.js b/services/history-v1/storage/tasks/fix_duplicate_versions.js
index a7db4b2765..ae9dcb4965 100755
--- a/services/history-v1/storage/tasks/fix_duplicate_versions.js
+++ b/services/history-v1/storage/tasks/fix_duplicate_versions.js
@@ -34,7 +34,7 @@ async function main() {
async function processProject(projectId, save) {
console.log(`Project ${projectId}:`)
- const chunk = await chunkStore.loadLatest(projectId)
+ const chunk = await chunkStore.loadLatest(projectId, { persistedOnly: true })
let numChanges = 0
numChanges += removeDuplicateProjectVersions(chunk)
numChanges += removeDuplicateDocVersions(chunk)
diff --git a/services/history-v1/test/acceptance/js/api/backupDeletion.test.mjs b/services/history-v1/test/acceptance/js/api/backupDeletion.test.mjs
index 8493f4d143..46512d1b92 100644
--- a/services/history-v1/test/acceptance/js/api/backupDeletion.test.mjs
+++ b/services/history-v1/test/acceptance/js/api/backupDeletion.test.mjs
@@ -50,7 +50,7 @@ async function deleteProject(projectId) {
}
/**
- * @param {string|ObjectId} historyId
+ * @param {number|ObjectId} historyId
* @return {Promise}
*/
async function expectToHaveBackup(historyId) {
@@ -61,7 +61,7 @@ async function expectToHaveBackup(historyId) {
}
/**
- * @param {string|ObjectId} historyId
+ * @param {number|ObjectId} historyId
* @return {Promise}
*/
async function expectToHaveNoBackup(historyId) {
@@ -90,7 +90,7 @@ describe('backupDeletion', function () {
})
describe('DELETE /project/:projectId', function () {
- const postgresHistoryId = '1'
+ const postgresHistoryId = 1
const projectIdPostgres = new ObjectId('000000000000000000000001')
const projectIdMongoDB = new ObjectId('000000000000000000000002')
const projectIdNonDeleted = new ObjectId('000000000000000000000003')
@@ -98,12 +98,6 @@ describe('backupDeletion', function () {
const projectIdWithChunks = new ObjectId('000000000000000000000005')
const projectIdNoHistoryId = new ObjectId('000000000000000000000006')
- beforeEach('cleanup s3 buckets', async function () {
- await backupPersistor.deleteDirectory(deksBucket, '')
- await backupPersistor.deleteDirectory(chunksBucket, '')
- await backupPersistor.deleteDirectory(projectBlobsBucket, '')
- })
-
beforeEach('populate mongo', async function () {
await deletedProjectsCollection.insertMany([
{
diff --git a/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs b/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs
index af041b0d7a..0a1fa528ab 100644
--- a/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs
+++ b/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs
@@ -6,29 +6,75 @@ import { expect } from 'chai'
import testProjects from './support/test_projects.js'
import {
backupPersistor,
+ chunksBucket,
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import {
BlobStore,
makeProjectKey,
} from '../../../../storage/lib/blob_store/index.js'
-import Stream from 'stream'
+import Stream from 'node:stream'
import * as zlib from 'node:zlib'
import { promisify } from 'node:util'
import { execFile } from 'node:child_process'
import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
+import { chunkStore } from '../../../../storage/index.js'
+import { Change, File, Operation } from 'overleaf-editor-core'
+import Crypto from 'node:crypto'
+import path from 'node:path'
+import projectKey from '../../../../storage/lib/project_key.js'
+import { historyStore } from '../../../../storage/lib/history_store.js'
/**
* @typedef {import("node-fetch").Response} Response
* @typedef {import("overleaf-editor-core").Blob} Blob
*/
+// Timeout for script execution, increased to avoid flaky tests
+const SCRIPT_TIMEOUT = 15_000
+
+async function verifyProjectScript(historyId, expectFail = true) {
+ try {
+ const result = await promisify(execFile)(
+ process.argv0,
+ ['storage/scripts/verify_project.mjs', `--historyId=${historyId}`],
+ {
+ encoding: 'utf-8',
+ timeout: SCRIPT_TIMEOUT,
+ env: {
+ ...process.env,
+ LOG_LEVEL: 'warn',
+ },
+ }
+ )
+ return { status: 0, stdout: result.stdout, stderr: result.stderr }
+ } catch (err) {
+ if (
+ err &&
+ typeof err === 'object' &&
+ 'stdout' in err &&
+ 'code' in err &&
+ 'stderr' in err
+ ) {
+ if (!expectFail) {
+ console.log(err)
+ }
+ return {
+ stdout: typeof err.stdout === 'string' ? err.stdout : '',
+ status: typeof err.code === 'number' ? err.code : -1,
+ stderr: typeof err.stdout === 'string' ? err.stderr : '',
+ }
+ }
+ throw err
+ }
+}
+
/**
* @param {string} historyId
* @param {string} hash
* @return {Promise<{stdout: string, status:number }>}
*/
-async function verifyBlobScript(historyId, hash) {
+async function verifyBlobScript(historyId, hash, expectFail = true) {
try {
const result = await promisify(execFile)(
process.argv0,
@@ -39,13 +85,19 @@ async function verifyBlobScript(historyId, hash) {
],
{
encoding: 'utf-8',
- timeout: 5_000,
- env: process.env,
+ timeout: SCRIPT_TIMEOUT,
+ env: {
+ ...process.env,
+ LOG_LEVEL: 'warn',
+ },
}
)
return { status: 0, stdout: result.stdout }
} catch (err) {
if (err && typeof err === 'object' && 'stdout' in err && 'code' in err) {
+ if (!expectFail) {
+ console.log(err)
+ }
return {
stdout: typeof err.stdout === 'string' ? err.stdout : '',
status: typeof err.code === 'number' ? err.code : -1,
@@ -66,22 +118,84 @@ async function verifyBlobHTTP(historyId, hash) {
)
}
+async function backupChunk(historyId) {
+ const newChunkMetadata = await chunkStore.getLatestChunkMetadata(historyId)
+ const { buffer: chunkBuffer } = await historyStore.loadRawWithBuffer(
+ historyId,
+ newChunkMetadata.id
+ )
+ const md5 = Crypto.createHash('md5').update(chunkBuffer)
+ await backupPersistor.sendStream(
+ chunksBucket,
+ path.join(
+ projectKey.format(historyId),
+ projectKey.pad(newChunkMetadata.startVersion)
+ ),
+ Stream.Readable.from([chunkBuffer]),
+ {
+ contentType: 'application/json',
+ contentEncoding: 'gzip',
+ contentLength: chunkBuffer.byteLength,
+ sourceMd5: md5.digest('hex'),
+ }
+ )
+}
+
+const FIFTEEN_MINUTES_IN_MS = 900_000
+
+async function addFileInNewChunk(
+ fileContents,
+ filePath,
+ historyId,
+ { creationDate = new Date() }
+) {
+ const chunk = await chunkStore.loadLatest(historyId, { persistedOnly: true })
+ const operation = Operation.addFile(
+ `${historyId}.txt`,
+ File.fromString(fileContents)
+ )
+ const changes = [new Change([operation], creationDate, [])]
+ chunk.pushChanges(changes)
+ await chunkStore.update(historyId, chunk)
+}
+
/**
* @param {string} historyId
+ * @param {Object} [backup]
* @return {Promise}
*/
-async function prepareProjectAndBlob(historyId) {
+async function prepareProjectAndBlob(
+ historyId,
+ { shouldBackupBlob, shouldBackupChunk, shouldCreateChunk } = {
+ shouldBackupBlob: true,
+ shouldBackupChunk: true,
+ shouldCreateChunk: true,
+ }
+) {
await testProjects.createEmptyProject(historyId)
const blobStore = new BlobStore(historyId)
- const blob = await blobStore.putString(historyId)
- const gzipped = zlib.gzipSync(Buffer.from(historyId))
- await backupPersistor.sendStream(
- projectBlobsBucket,
- makeProjectKey(historyId, blob.getHash()),
- Stream.Readable.from([gzipped]),
- { contentLength: gzipped.byteLength, contentEncoding: 'gzip' }
- )
- await checkDEKExists(historyId)
+ const fileContents = historyId
+ const blob = await blobStore.putString(fileContents)
+ if (shouldCreateChunk) {
+ await addFileInNewChunk(fileContents, `${historyId}.txt`, historyId, {
+ creationDate: new Date(new Date().getTime() - FIFTEEN_MINUTES_IN_MS),
+ })
+ }
+
+ if (shouldBackupBlob) {
+ const gzipped = zlib.gzipSync(Buffer.from(historyId))
+ await backupPersistor.sendStream(
+ projectBlobsBucket,
+ makeProjectKey(historyId, blob.getHash()),
+ Stream.Readable.from([gzipped]),
+ { contentLength: gzipped.byteLength, contentEncoding: 'gzip' }
+ )
+ await checkDEKExists(historyId)
+ }
+ if (shouldCreateChunk && shouldBackupChunk) {
+ await backupChunk(historyId)
+ }
+
return blob.getHash()
}
@@ -97,6 +211,7 @@ async function checkDEKExists(historyId) {
}
describe('backupVerifier', function () {
+ this.timeout(5_000 + SCRIPT_TIMEOUT) // allow time for external scripts to run
const historyIdPostgres = '42'
const historyIdMongo = '000000000000000000000042'
let blobHashPG, blobHashMongo, blobPathPG
@@ -120,6 +235,53 @@ describe('backupVerifier', function () {
const response = await fetch(testServer.testUrl('/health_check'))
expect(response.status).to.equal(200)
})
+ describe('storage/scripts/verify_project.mjs', function () {
+ describe('when the project is appropriately backed up', function () {
+ it('should return 0', async function () {
+ const response = await verifyProjectScript(historyIdPostgres, false)
+ expect(response.status).to.equal(0)
+ })
+ })
+ describe('when the project chunk is not backed up', function () {
+ let response
+ beforeEach(async function () {
+ await prepareProjectAndBlob('000000000000000000000043', {
+ shouldBackupChunk: false,
+ shouldBackupBlob: true,
+ shouldCreateChunk: true,
+ })
+ response = await verifyProjectScript('000000000000000000000043')
+ })
+ it('should return 1', async function () {
+ expect(response.status).to.equal(1)
+ })
+ it('should emit an error message referring to a missing chunk', async function () {
+ const stderr = response.stderr
+ expect(stderr).to.include('BackupRPOViolationChunkNotBackedUpError')
+ })
+ })
+ describe('when a project blob is not backed up', function () {
+ let response
+ beforeEach(async function () {
+ await prepareProjectAndBlob('43', {
+ shouldBackupChunk: true,
+ shouldBackupBlob: false,
+ shouldCreateChunk: true,
+ })
+ response = await verifyProjectScript('43')
+ })
+
+ it('should return 1', function () {
+ expect(response.status).to.equal(1)
+ })
+
+ it('includes a BackupCorruptedError in stderr', function () {
+ expect(response.stderr).to.include(
+ 'BackupCorruptedMissingBlobError: missing blob'
+ )
+ })
+ })
+ })
describe('storage/scripts/verify_backup_blob.mjs', function () {
it('throws and does not create DEK if missing', async function () {
const historyId = '404'
@@ -154,12 +316,20 @@ describe('backupVerifier', function () {
expect(result.stdout).to.include('hash mismatch for backed up blob')
})
it('should successfully verify from postgres', async function () {
- const result = await verifyBlobScript(historyIdPostgres, blobHashPG)
+ const result = await verifyBlobScript(
+ historyIdPostgres,
+ blobHashPG,
+ false
+ )
expect(result.status).to.equal(0)
expect(result.stdout.split('\n')).to.include('OK')
})
it('should successfully verify from mongo', async function () {
- const result = await verifyBlobScript(historyIdMongo, blobHashMongo)
+ const result = await verifyBlobScript(
+ historyIdMongo,
+ blobHashMongo,
+ false
+ )
expect(result.status).to.equal(0)
expect(result.stdout.split('\n')).to.include('OK')
})
diff --git a/services/history-v1/test/acceptance/js/api/project_expiry.test.js b/services/history-v1/test/acceptance/js/api/project_expiry.test.js
new file mode 100644
index 0000000000..efa589ec71
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/api/project_expiry.test.js
@@ -0,0 +1,82 @@
+'use strict'
+
+const BPromise = require('bluebird')
+const { expect } = require('chai')
+const HTTPStatus = require('http-status')
+const fetch = require('node-fetch')
+const fs = BPromise.promisifyAll(require('node:fs'))
+
+const cleanup = require('../storage/support/cleanup')
+const fixtures = require('../storage/support/fixtures')
+const testFiles = require('../storage/support/test_files')
+const testProjects = require('./support/test_projects')
+const testServer = require('./support/test_server')
+
+const { Change, File, Operation } = require('overleaf-editor-core')
+const queueChanges = require('../../../../storage/lib/queue_changes')
+const { getState } = require('../../../../storage/lib/chunk_store/redis')
+
+describe('project expiry', function () {
+ beforeEach(cleanup.everything)
+ beforeEach(fixtures.create)
+
+ it('expire redis buffer', async function () {
+ const basicAuthClient = testServer.basicAuthClient
+ const projectId = await testProjects.createEmptyProject()
+
+ // upload an empty file
+ const response = await fetch(
+ testServer.url(
+ `/api/projects/${projectId}/blobs/${File.EMPTY_FILE_HASH}`,
+ { qs: { pathname: 'main.tex' } }
+ ),
+ {
+ method: 'PUT',
+ body: fs.createReadStream(testFiles.path('empty.tex')),
+ headers: {
+ Authorization: testServer.basicAuthHeader,
+ },
+ }
+ )
+ expect(response.ok).to.be.true
+
+ const testFile = File.fromHash(File.EMPTY_FILE_HASH)
+ const testChange = new Change(
+ [Operation.addFile('main.tex', testFile)],
+ new Date()
+ )
+ await queueChanges(projectId, [testChange], 0)
+
+ // Verify that the changes are queued and not yet persisted
+ const initialState = await getState(projectId)
+ expect(initialState.persistedVersion).to.be.null
+ expect(initialState.changes).to.have.lengthOf(1)
+
+ const importResponse =
+ await basicAuthClient.apis.ProjectImport.flushChanges({
+ project_id: projectId,
+ })
+
+ expect(importResponse.status).to.equal(HTTPStatus.OK)
+
+ // Verify that the changes were persisted to the chunk store
+ const flushedState = await getState(projectId)
+ expect(flushedState.persistedVersion).to.equal(1)
+
+ const expireResponse =
+ await basicAuthClient.apis.ProjectImport.expireProject({
+ project_id: projectId,
+ })
+ expect(expireResponse.status).to.equal(HTTPStatus.OK)
+
+ const finalState = await getState(projectId)
+ expect(finalState).to.deep.equal({
+ changes: [],
+ expireTime: null,
+ headSnapshot: null,
+ headVersion: null,
+ persistTime: null,
+ persistedVersion: null,
+ })
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/api/project_flush.test.js b/services/history-v1/test/acceptance/js/api/project_flush.test.js
new file mode 100644
index 0000000000..f8d0b23d8e
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/api/project_flush.test.js
@@ -0,0 +1,66 @@
+'use strict'
+
+const BPromise = require('bluebird')
+const { expect } = require('chai')
+const HTTPStatus = require('http-status')
+const fetch = require('node-fetch')
+const fs = BPromise.promisifyAll(require('node:fs'))
+
+const cleanup = require('../storage/support/cleanup')
+const fixtures = require('../storage/support/fixtures')
+const testFiles = require('../storage/support/test_files')
+const testProjects = require('./support/test_projects')
+const testServer = require('./support/test_server')
+
+const { Change, File, Operation } = require('overleaf-editor-core')
+const queueChanges = require('../../../../storage/lib/queue_changes')
+const { getState } = require('../../../../storage/lib/chunk_store/redis')
+
+describe('project flush', function () {
+ beforeEach(cleanup.everything)
+ beforeEach(fixtures.create)
+
+ it('persists queued changes to the chunk store', async function () {
+ const basicAuthClient = testServer.basicAuthClient
+ const projectId = await testProjects.createEmptyProject()
+
+ // upload an empty file
+ const response = await fetch(
+ testServer.url(
+ `/api/projects/${projectId}/blobs/${File.EMPTY_FILE_HASH}`,
+ { qs: { pathname: 'main.tex' } }
+ ),
+ {
+ method: 'PUT',
+ body: fs.createReadStream(testFiles.path('empty.tex')),
+ headers: {
+ Authorization: testServer.basicAuthHeader,
+ },
+ }
+ )
+ expect(response.ok).to.be.true
+
+ const testFile = File.fromHash(File.EMPTY_FILE_HASH)
+ const testChange = new Change(
+ [Operation.addFile('main.tex', testFile)],
+ new Date()
+ )
+ await queueChanges(projectId, [testChange], 0)
+
+ // Verify that the changes are queued and not yet persisted
+ const initialState = await getState(projectId)
+ expect(initialState.persistedVersion).to.be.null
+ expect(initialState.changes).to.have.lengthOf(1)
+
+ const importResponse =
+ await basicAuthClient.apis.ProjectImport.flushChanges({
+ project_id: projectId,
+ })
+
+ expect(importResponse.status).to.equal(HTTPStatus.OK)
+
+ // Verify that the changes were persisted to the chunk store
+ const finalState = await getState(projectId)
+ expect(finalState.persistedVersion).to.equal(1)
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/api/project_import.test.js b/services/history-v1/test/acceptance/js/api/project_import.test.js
index 216fb527fa..fb173238f8 100644
--- a/services/history-v1/test/acceptance/js/api/project_import.test.js
+++ b/services/history-v1/test/acceptance/js/api/project_import.test.js
@@ -52,6 +52,6 @@ describe('project import', function () {
})
expect(importResponse.status).to.equal(HTTPStatus.CREATED)
- expect(importResponse.obj).to.deep.equal({})
+ expect(importResponse.obj).to.deep.equal({ resyncNeeded: false })
})
})
diff --git a/services/history-v1/test/acceptance/js/api/project_updates.test.js b/services/history-v1/test/acceptance/js/api/project_updates.test.js
index d67000245a..f50f3677b5 100644
--- a/services/history-v1/test/acceptance/js/api/project_updates.test.js
+++ b/services/history-v1/test/acceptance/js/api/project_updates.test.js
@@ -580,7 +580,7 @@ describe('history import', function () {
.catch(expectResponse.unprocessableEntity)
.then(getLatestContent)
.then(response => {
- // Check that no chaes were made
+ // Check that no changes were made
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(1)
expect(snapshot.getFile(mainFilePathname).getHash()).to.equal(
diff --git a/services/history-v1/test/acceptance/js/api/projects.test.js b/services/history-v1/test/acceptance/js/api/projects.test.js
index 6358f8505d..22220ae8bd 100644
--- a/services/history-v1/test/acceptance/js/api/projects.test.js
+++ b/services/history-v1/test/acceptance/js/api/projects.test.js
@@ -10,7 +10,12 @@ const cleanup = require('../storage/support/cleanup')
const fixtures = require('../storage/support/fixtures')
const testFiles = require('../storage/support/test_files')
-const { zipStore, persistChanges } = require('../../../../storage')
+const {
+ zipStore,
+ BlobStore,
+ persistChanges,
+ redisBuffer,
+} = require('../../../../storage')
const { expectHttpError } = require('./support/expect_response')
const testServer = require('./support/test_server')
@@ -21,6 +26,8 @@ const {
Snapshot,
Change,
AddFileOperation,
+ EditFileOperation,
+ TextOperation,
} = require('overleaf-editor-core')
const testProjects = require('./support/test_projects')
@@ -103,40 +110,195 @@ describe('project controller', function () {
// https://github.com/overleaf/write_latex/pull/5120#discussion_r244291862
})
- describe('getLatestHashedContent', function () {
- let limitsToPersistImmediately
+ describe('project with changes', function () {
+ let projectId
- before(function () {
+ beforeEach(async function () {
// used to provide a limit which forces us to persist all of the changes.
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
- limitsToPersistImmediately = {
+ const limits = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
- })
-
- it('returns a snaphot', async function () {
const changes = [
new Change(
[new AddFileOperation('test.tex', File.fromString('ab'))],
new Date(),
[]
),
+ new Change(
+ [new AddFileOperation('other.tex', File.fromString('hello'))],
+ new Date(),
+ []
+ ),
]
- const projectId = await createEmptyProject()
- await persistChanges(projectId, changes, limitsToPersistImmediately, 0)
- const response =
- await testServer.basicAuthClient.apis.Project.getLatestHashedContent({
- project_id: projectId,
+ projectId = await createEmptyProject()
+ await persistChanges(projectId, changes, limits, 0)
+ })
+
+ describe('getLatestHashedContent', function () {
+ it('returns a snapshot', async function () {
+ const response =
+ await testServer.basicAuthClient.apis.Project.getLatestHashedContent({
+ project_id: projectId,
+ })
+ expect(response.status).to.equal(HTTPStatus.OK)
+ const snapshot = Snapshot.fromRaw(response.obj)
+ expect(snapshot.countFiles()).to.equal(2)
+ expect(snapshot.getFile('test.tex').getHash()).to.equal(
+ testFiles.STRING_AB_HASH
+ )
+ })
+ })
+
+ describe('getChanges', function () {
+ it('returns all changes when not given a limit', async function () {
+ const response =
+ await testServer.basicAuthClient.apis.Project.getChanges({
+ project_id: projectId,
+ })
+ expect(response.status).to.equal(HTTPStatus.OK)
+ const { changes, hasMore } = response.obj
+ expect(changes.length).to.equal(2)
+ const filenames = changes
+ .flatMap(change => change.operations)
+ .map(operation => operation.pathname)
+ expect(filenames).to.deep.equal(['test.tex', 'other.tex'])
+ expect(hasMore).to.be.false
+ })
+
+ it('returns only requested changes', async function () {
+ const response =
+ await testServer.basicAuthClient.apis.Project.getChanges({
+ project_id: projectId,
+ since: 1,
+ })
+ expect(response.status).to.equal(HTTPStatus.OK)
+ const { changes, hasMore } = response.obj
+ expect(changes.length).to.equal(1)
+ const filenames = changes
+ .flatMap(change => change.operations)
+ .map(operation => operation.pathname)
+ expect(filenames).to.deep.equal(['other.tex'])
+ expect(hasMore).to.be.false
+ })
+
+ it('rejects negative versions', async function () {
+ await expect(
+ testServer.basicAuthClient.apis.Project.getChanges({
+ project_id: projectId,
+ since: -1,
+ })
+ ).to.be.rejectedWith('Bad Request')
+ })
+
+ it('rejects out of bounds versions', async function () {
+ await expect(
+ testServer.basicAuthClient.apis.Project.getChanges({
+ project_id: projectId,
+ since: 20,
+ })
+ ).to.be.rejectedWith('Bad Request')
+ })
+ })
+
+ describe('project with many chunks', function () {
+ let projectId, changes
+
+ beforeEach(async function () {
+ // used to provide a limit which forces us to persist all of the changes.
+ const farFuture = new Date()
+ farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
+ const limits = {
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ maxChunkChanges: 5,
+ }
+ projectId = await createEmptyProject()
+ const blobStore = new BlobStore(projectId)
+ const blob = await blobStore.putString('')
+ changes = [
+ new Change(
+ [new AddFileOperation('test.tex', File.createLazyFromBlobs(blob))],
+ new Date(),
+ []
+ ),
+ ]
+
+ for (let i = 0; i < 20; i++) {
+ const textOperation = new TextOperation()
+ textOperation.retain(i)
+ textOperation.insert('x')
+ changes.push(
+ new Change(
+ [new EditFileOperation('test.tex', textOperation)],
+ new Date(),
+ []
+ )
+ )
+ }
+
+ await persistChanges(projectId, changes, limits, 0)
+ })
+
+ it('returns the first chunk when not given a limit', async function () {
+ const response =
+ await testServer.basicAuthClient.apis.Project.getChanges({
+ project_id: projectId,
+ })
+
+ expect(response.status).to.equal(HTTPStatus.OK)
+ expect(response.obj).to.deep.equal({
+ changes: changes.slice(0, 5).map(c => c.toRaw()),
+ hasMore: true,
})
- expect(response.status).to.equal(HTTPStatus.OK)
- const snapshot = Snapshot.fromRaw(response.obj)
- expect(snapshot.countFiles()).to.equal(1)
- expect(snapshot.getFile('test.tex').getHash()).to.equal(
- testFiles.STRING_AB_HASH
- )
+ })
+
+ it('returns only requested changes', async function () {
+ const response =
+ await testServer.basicAuthClient.apis.Project.getChanges({
+ project_id: projectId,
+ since: 12,
+ })
+ expect(response.status).to.equal(HTTPStatus.OK)
+ expect(response.obj).to.deep.equal({
+ changes: changes.slice(12, 15).map(c => c.toRaw()),
+ hasMore: true,
+ })
+ })
+
+ it('returns changes in the latest chunk', async function () {
+ const response =
+ await testServer.basicAuthClient.apis.Project.getChanges({
+ project_id: projectId,
+ since: 20,
+ })
+ expect(response.status).to.equal(HTTPStatus.OK)
+ expect(response.obj).to.deep.equal({
+ changes: changes.slice(20).map(c => c.toRaw()),
+ hasMore: false,
+ })
+ })
+ })
+ })
+
+ describe('getLatestHistoryRaw', function () {
+ it('should handles read', async function () {
+ const projectId = fixtures.docs.initializedProject.id
+ const response =
+ await testServer.pseudoJwtBasicAuthClient.apis.Project.getLatestHistoryRaw(
+ {
+ project_id: projectId,
+ readOnly: 'true',
+ }
+ )
+ expect(response.body).to.deep.equal({
+ startVersion: 0,
+ endVersion: 1,
+ endTimestamp: '2032-01-01T00:00:00.000Z',
+ })
})
})
@@ -190,5 +352,60 @@ describe('project controller', function () {
const response3 = await fetch(blobUrl, { headers: authHeaders })
expect(response3.status).to.equal(HTTPStatus.NOT_FOUND)
})
+
+ it('deletes the project from the redis buffer', async function () {
+ const projectId = await createEmptyProject()
+ const blobStore = new BlobStore(projectId)
+ const blob = await blobStore.putString('this is a test')
+ const snapshot = new Snapshot()
+ const change = new Change(
+ [new AddFileOperation('test.tex', File.createLazyFromBlobs(blob))],
+ new Date(),
+ []
+ )
+
+ await redisBuffer.queueChanges(projectId, snapshot, 0, [change])
+ const changesBefore = await redisBuffer.getNonPersistedChanges(
+ projectId,
+ 0
+ )
+ expect(changesBefore.length).to.equal(1)
+
+ const deleteResponse =
+ await testServer.basicAuthClient.apis.Project.deleteProject({
+ project_id: projectId,
+ })
+ expect(deleteResponse.status).to.equal(HTTPStatus.NO_CONTENT)
+
+ const changesAfter = await redisBuffer.getNonPersistedChanges(
+ projectId,
+ 0
+ )
+ expect(changesAfter.length).to.equal(0)
+
+ const finalState = await redisBuffer.getState(projectId)
+ expect(finalState).to.deep.equal({
+ changes: [],
+ expireTime: null,
+ headSnapshot: null,
+ headVersion: null,
+ persistTime: null,
+ persistedVersion: null,
+ })
+ })
+
+ it('deletes an empty project from the redis buffer', async function () {
+ const projectId = await createEmptyProject()
+ const deleteResponse =
+ await testServer.basicAuthClient.apis.Project.deleteProject({
+ project_id: projectId,
+ })
+ expect(deleteResponse.status).to.equal(HTTPStatus.NO_CONTENT)
+ const changesAfter = await redisBuffer.getNonPersistedChanges(
+ projectId,
+ 0
+ )
+ expect(changesAfter.length).to.equal(0)
+ })
})
})
diff --git a/services/history-v1/test/acceptance/js/api/rollout.test.js b/services/history-v1/test/acceptance/js/api/rollout.test.js
new file mode 100644
index 0000000000..f1a65e5aff
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/api/rollout.test.js
@@ -0,0 +1,115 @@
+const config = require('config')
+const sinon = require('sinon')
+const { expect } = require('chai')
+
+const cleanup = require('../storage/support/cleanup')
+const Rollout = require('../../../../api/app/rollout')
+
+describe('rollout', function () {
+ beforeEach(cleanup.everything)
+ beforeEach('Set up stubs', function () {
+ sinon.stub(config, 'has').callThrough()
+ sinon.stub(config, 'get').callThrough()
+ })
+ afterEach(sinon.restore)
+
+ it('should return a valid history buffer level', function () {
+ setMockConfig('historyBufferLevel', '2')
+ setMockConfig('forcePersistBuffer', 'false')
+
+ const rollout = new Rollout(config)
+ const { historyBufferLevel, forcePersistBuffer } =
+ rollout.getHistoryBufferLevelOptions('test-project-id')
+ expect(historyBufferLevel).to.equal(2)
+ expect(forcePersistBuffer).to.be.false
+ })
+
+ it('should return a valid history buffer level and force persist buffer options', function () {
+ setMockConfig('historyBufferLevel', '1')
+ setMockConfig('forcePersistBuffer', 'true')
+ const rollout = new Rollout(config)
+ const { historyBufferLevel, forcePersistBuffer } =
+ rollout.getHistoryBufferLevelOptions('test-project-id')
+ expect(historyBufferLevel).to.equal(1)
+ expect(forcePersistBuffer).to.be.true
+ })
+
+ describe('with a higher next history buffer level rollout', function () {
+ beforeEach(function () {
+ setMockConfig('historyBufferLevel', '2')
+ setMockConfig('forcePersistBuffer', 'false')
+ setMockConfig('nextHistoryBufferLevel', '3')
+ })
+ it('should return the expected history buffer level when the rollout percentage is zero', function () {
+ setMockConfig('nextHistoryBufferLevelRolloutPercentage', '0')
+ const rollout = new Rollout(config)
+ for (let i = 0; i < 1000; i++) {
+ const { historyBufferLevel, forcePersistBuffer } =
+ rollout.getHistoryBufferLevelOptions(`test-project-id-${i}`)
+ expect(historyBufferLevel).to.equal(2)
+ expect(forcePersistBuffer).to.be.false
+ }
+ })
+
+ it('should return the expected distribution of levels when the rollout percentage is 10%', function () {
+ setMockConfig('nextHistoryBufferLevelRolloutPercentage', '10')
+ const rollout = new Rollout(config)
+ let currentLevel = 0
+ let nextLevel = 0
+ for (let i = 0; i < 1000; i++) {
+ const { historyBufferLevel } = rollout.getHistoryBufferLevelOptions(
+ `test-project-id-${i}`
+ )
+ switch (historyBufferLevel) {
+ case 2:
+ currentLevel++
+ break
+ case 3:
+ nextLevel++
+ break
+ default:
+ expect.fail(
+ `Unexpected history buffer level: ${historyBufferLevel}`
+ )
+ }
+ }
+ const twoPercentage = (currentLevel / 1000) * 100
+ const threePercentage = (nextLevel / 1000) * 100
+ expect(twoPercentage).to.be.closeTo(90, 5) // 90% for level 2
+ expect(threePercentage).to.be.closeTo(10, 5) // 10% for level 3
+ })
+ })
+ describe('with a next history buffer level lower than the current level', function () {
+ beforeEach(function () {
+ setMockConfig('historyBufferLevel', '3')
+ setMockConfig('forcePersistBuffer', 'false')
+ setMockConfig('nextHistoryBufferLevel', '2')
+ })
+ it('should always return the current level when the rollout percentage is zero', function () {
+ setMockConfig('nextHistoryBufferLevelRolloutPercentage', '0')
+ const rollout = new Rollout(config)
+ for (let i = 0; i < 1000; i++) {
+ const { historyBufferLevel, forcePersistBuffer } =
+ rollout.getHistoryBufferLevelOptions(`test-project-id-${i}`)
+ expect(historyBufferLevel).to.equal(3)
+ expect(forcePersistBuffer).to.be.false
+ }
+ })
+
+ it('should always return the current level regardless of the rollout percentage', function () {
+ setMockConfig('nextHistoryBufferLevelRolloutPercentage', '10')
+ const rollout = new Rollout(config)
+ for (let i = 0; i < 1000; i++) {
+ const { historyBufferLevel } = rollout.getHistoryBufferLevelOptions(
+ `test-project-id-${i}`
+ )
+ expect(historyBufferLevel).to.equal(3)
+ }
+ })
+ })
+})
+
+function setMockConfig(path, value) {
+ config.has.withArgs(path).returns(true)
+ config.get.withArgs(path).returns(value)
+}
diff --git a/services/history-v1/test/acceptance/js/api/support/test_backup_verifier_server.mjs b/services/history-v1/test/acceptance/js/api/support/test_backup_verifier_server.mjs
index 10d6dbc6c1..57a805e334 100644
--- a/services/history-v1/test/acceptance/js/api/support/test_backup_verifier_server.mjs
+++ b/services/history-v1/test/acceptance/js/api/support/test_backup_verifier_server.mjs
@@ -26,7 +26,7 @@ async function listenOnRandomPort() {
return
} catch {}
}
- server = await startApp(0)
+ server = await startApp(0, false)
}
after('close server', function (done) {
diff --git a/services/history-v1/test/acceptance/js/storage/assert.test.js b/services/history-v1/test/acceptance/js/storage/assert.test.js
new file mode 100644
index 0000000000..6ba30e2562
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/assert.test.js
@@ -0,0 +1,248 @@
+'use strict'
+
+const OError = require('@overleaf/o-error')
+const { expect } = require('chai')
+const assert = require('../../../../storage/lib/assert')
+
+describe('assert', function () {
+ describe('blobHash', function () {
+ it('should not throw for valid blob hashes', function () {
+ expect(() =>
+ assert.blobHash(
+ 'aad321caf77ca6c5ab09e6c638c237705f93b001',
+ 'should be a blob hash'
+ )
+ ).to.not.throw()
+ })
+
+ it('should throw for invalid blob hashes', function () {
+ try {
+ assert.blobHash('invalid-hash', 'should be a blob hash')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a blob hash')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-hash' })
+ }
+ })
+
+ it('should throw for string integer blob hashes', function () {
+ try {
+ assert.blobHash('123', 'should be a blob hash')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a blob hash')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: '123' })
+ }
+ })
+ })
+
+ describe('projectId', function () {
+ it('should not throw for valid mongo project ids', function () {
+ expect(() =>
+ assert.projectId('507f1f77bcf86cd799439011', 'should be a project id')
+ ).to.not.throw()
+ })
+
+ it('should not throw for valid postgres project ids', function () {
+ expect(() =>
+ assert.projectId('123456789', 'should be a project id')
+ ).to.not.throw()
+ })
+
+ it('should throw for invalid project ids', function () {
+ try {
+ assert.projectId('invalid-id', 'should be a project id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a project id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' })
+ }
+ })
+
+ it('should throw for non-numeric project ids', function () {
+ try {
+ assert.projectId('12345x', 'should be a project id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a project id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345x' })
+ }
+ })
+
+ it('should throw for postgres ids starting with 0', function () {
+ try {
+ assert.projectId('0123456', 'should be a project id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a project id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: '0123456' })
+ }
+ })
+ })
+
+ describe('chunkId', function () {
+ it('should not throw for valid mongo chunk ids', function () {
+ expect(() =>
+ assert.chunkId('507f1f77bcf86cd799439011', 'should be a chunk id')
+ ).to.not.throw()
+ })
+
+ it('should not throw for valid postgres chunk ids', function () {
+ expect(() =>
+ assert.chunkId('123456789', 'should be a chunk id')
+ ).to.not.throw()
+ })
+
+ it('should throw for invalid chunk ids', function () {
+ try {
+ assert.chunkId('invalid-id', 'should be a chunk id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a chunk id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' })
+ }
+ })
+
+ it('should throw for integer chunk ids', function () {
+ try {
+ assert.chunkId(12345, 'should be a chunk id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a chunk id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: 12345 })
+ }
+ })
+ })
+
+ describe('mongoId', function () {
+ it('should not throw for valid mongo ids', function () {
+ expect(() =>
+ assert.mongoId('507f1f77bcf86cd799439011', 'should be a mongo id')
+ ).to.not.throw()
+ })
+
+ it('should throw for invalid mongo ids', function () {
+ try {
+ assert.mongoId('invalid-id', 'should be a mongo id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a mongo id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' })
+ }
+ })
+
+ it('should throw for numeric mongo ids', function () {
+ try {
+ assert.mongoId('12345', 'should be a mongo id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a mongo id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345' })
+ }
+ })
+
+ it('should throw for mongo ids that are too short', function () {
+ try {
+ assert.mongoId('507f1f77bcf86cd79943901', 'should be a mongo id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a mongo id')
+ expect(OError.getFullInfo(error)).to.deep.equal({
+ arg: '507f1f77bcf86cd79943901',
+ })
+ }
+ })
+
+ it('should throw for mongo ids that are too long', function () {
+ try {
+ assert.mongoId('507f1f77bcf86cd7994390111', 'should be a mongo id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a mongo id')
+ expect(OError.getFullInfo(error)).to.deep.equal({
+ arg: '507f1f77bcf86cd7994390111',
+ })
+ }
+ })
+ })
+
+ describe('postgresId', function () {
+ it('should not throw for valid postgres ids', function () {
+ expect(() =>
+ assert.postgresId('123456789', 'should be a postgres id')
+ ).to.not.throw()
+ expect(() =>
+ assert.postgresId('1', 'should be a postgres id')
+ ).to.not.throw()
+ })
+
+ it('should throw for invalid postgres ids', function () {
+ try {
+ assert.postgresId('invalid-id', 'should be a postgres id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a postgres id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' })
+ }
+ })
+
+ it('should throw for postgres ids starting with 0', function () {
+ try {
+ assert.postgresId('0123456', 'should be a postgres id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a postgres id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: '0123456' })
+ }
+ })
+
+ it('should throw for postgres ids that are too long', function () {
+ try {
+ assert.postgresId('12345678901', 'should be a postgres id')
+ expect.fail()
+ } catch (error) {
+ expect(error).to.be.instanceOf(TypeError)
+ expect(error.message).to.equal('should be a postgres id')
+ expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345678901' })
+ }
+ })
+ })
+
+ describe('regex constants', function () {
+ it('MONGO_ID_REGEXP should match valid mongo ids', function () {
+ expect('507f1f77bcf86cd799439011').to.match(assert.MONGO_ID_REGEXP)
+ expect('abcdef0123456789abcdef01').to.match(assert.MONGO_ID_REGEXP)
+ })
+
+ it('MONGO_ID_REGEXP should not match invalid mongo ids', function () {
+ expect('invalid-id').to.not.match(assert.MONGO_ID_REGEXP)
+ expect('507f1f77bcf86cd79943901').to.not.match(assert.MONGO_ID_REGEXP) // too short
+ expect('507f1f77bcf86cd7994390111').to.not.match(assert.MONGO_ID_REGEXP) // too long
+ expect('507F1F77BCF86CD799439011').to.not.match(assert.MONGO_ID_REGEXP) // uppercase
+ })
+
+ it('POSTGRES_ID_REGEXP should match valid postgres ids', function () {
+ expect('123456789').to.match(assert.POSTGRES_ID_REGEXP)
+ expect('1').to.match(assert.POSTGRES_ID_REGEXP)
+ })
+
+ it('POSTGRES_ID_REGEXP should not match invalid postgres ids', function () {
+ expect('invalid-id').to.not.match(assert.POSTGRES_ID_REGEXP)
+ expect('0123456').to.not.match(assert.POSTGRES_ID_REGEXP) // starts with 0
+ expect('12345678901').to.not.match(assert.POSTGRES_ID_REGEXP) // too long (> 10 digits)
+ })
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
index 5e9bf2edaf..b6cdd4b9bf 100644
--- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
+++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
@@ -4,46 +4,29 @@ import Stream from 'node:stream'
import { setTimeout } from 'node:timers/promises'
import { promisify } from 'node:util'
import { ObjectId, Binary } from 'mongodb'
-import {
- db,
- backedUpBlobs,
- globalBlobs,
-} from '../../../../storage/lib/mongodb.js'
+import { db, globalBlobs } from '../../../../storage/lib/mongodb.js'
import cleanup from './support/cleanup.js'
import testProjects from '../api/support/test_projects.js'
import { execFile } from 'node:child_process'
-import { expect } from 'chai'
-import config from 'config'
-import ObjectPersistor from '@overleaf/object-persistor'
+import chai, { expect } from 'chai'
+import chaiExclude from 'chai-exclude'
import { WritableBuffer } from '@overleaf/stream-utils'
import {
backupPersistor,
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
-import projectKey from '../../../../storage/lib/project_key.js'
import {
BlobStore,
makeProjectKey,
} from '../../../../storage/lib/blob_store/index.js'
-const TIMEOUT = 20 * 1_000
+import { mockFilestore } from './support/MockFilestore.mjs'
-const { deksBucket } = config.get('backupStore')
-const { tieringStorageClass } = config.get('backupPersistor')
+chai.use(chaiExclude)
+const TIMEOUT = 20 * 1_000
const projectsCollection = db.collection('projects')
const deletedProjectsCollection = db.collection('deletedProjects')
-const deletedFilesCollection = db.collection('deletedFiles')
-
-const FILESTORE_PERSISTOR = ObjectPersistor({
- backend: 'gcs',
- gcs: {
- endpoint: {
- apiEndpoint: process.env.GCS_API_ENDPOINT,
- projectId: process.env.GCS_PROJECT_ID,
- },
- },
-})
/**
* @param {ObjectId} objectId
@@ -72,17 +55,6 @@ function binaryForGitBlobHash(gitBlobHash) {
return new Binary(Buffer.from(gitBlobHash, 'hex'))
}
-async function listS3Bucket(bucket, wantStorageClass) {
- const client = backupPersistor._getClientForBucket(bucket)
- const response = await client.listObjectsV2({ Bucket: bucket }).promise()
-
- for (const object of response.Contents || []) {
- expect(object).to.have.property('StorageClass', wantStorageClass)
- }
-
- return (response.Contents || []).map(item => item.Key || '')
-}
-
function objectIdFromTime(timestamp) {
return ObjectId.createFromTime(new Date(timestamp).getTime() / 1000)
}
@@ -128,11 +100,8 @@ describe('back_fill_file_hash script', function () {
const fileId7 = objectIdFromTime('2017-02-01T00:07:00Z')
const fileId8 = objectIdFromTime('2017-02-01T00:08:00Z')
const fileId9 = objectIdFromTime('2017-02-01T00:09:00Z')
- const fileIdDeleted1 = objectIdFromTime('2017-03-01T00:01:00Z')
- const fileIdDeleted2 = objectIdFromTime('2017-03-01T00:02:00Z')
- const fileIdDeleted3 = objectIdFromTime('2017-03-01T00:03:00Z')
- const fileIdDeleted4 = objectIdFromTime('2024-03-01T00:04:00Z')
- const fileIdDeleted5 = objectIdFromTime('2024-03-01T00:05:00Z')
+ const fileId10 = objectIdFromTime('2017-02-01T00:10:00Z')
+ const fileId11 = objectIdFromTime('2017-02-01T00:11:00Z')
const contentTextBlob0 = Buffer.from('Hello 0')
const hashTextBlob0 = gitBlobHashBuffer(contentTextBlob0)
const contentTextBlob1 = Buffer.from('Hello 1')
@@ -159,7 +128,6 @@ describe('back_fill_file_hash script', function () {
hash: hashFile7,
content: contentFile7,
},
- { projectId: projectId0, historyId: historyId0, fileId: fileIdDeleted5 },
{
projectId: projectId0,
historyId: historyId0,
@@ -179,7 +147,6 @@ describe('back_fill_file_hash script', function () {
content: contentTextBlob2,
},
{ projectId: projectId1, historyId: historyId1, fileId: fileId1 },
- { projectId: projectId1, historyId: historyId1, fileId: fileIdDeleted1 },
{
projectId: projectId2,
historyId: historyId2,
@@ -187,23 +154,28 @@ describe('back_fill_file_hash script', function () {
hasHash: true,
},
{ projectId: projectId3, historyId: historyId3, fileId: fileId3 },
+ // fileId10 is dupe of fileId3, without a hash
+ {
+ projectId: projectId3,
+ historyId: historyId3,
+ fileId: fileId10,
+ content: Buffer.from(fileId3.toString()),
+ hash: gitBlobHash(fileId3),
+ },
+ // fileId11 is dupe of fileId3, but with a hash
+ {
+ projectId: projectId3,
+ historyId: historyId3,
+ fileId: fileId11,
+ content: Buffer.from(fileId3.toString()),
+ hash: gitBlobHash(fileId3),
+ hasHash: true,
+ },
{
projectId: projectIdDeleted0,
historyId: historyIdDeleted0,
fileId: fileId4,
},
- {
- projectId: projectIdDeleted0,
- historyId: historyIdDeleted0,
- fileId: fileIdDeleted2,
- },
- // { historyId: historyIdDeleted0, fileId:fileIdDeleted3 }, // fileIdDeleted3 is dupe of fileIdDeleted2
- {
- projectId: projectIdDeleted0,
- historyId: historyIdDeleted0,
- fileId: fileIdDeleted4,
- hasHash: true,
- },
{
projectId: projectIdDeleted1,
historyId: historyIdDeleted1,
@@ -231,10 +203,6 @@ describe('back_fill_file_hash script', function () {
fileId4,
fileId5,
fileId6,
- fileIdDeleted1,
- fileIdDeleted2,
- fileIdDeleted3,
- fileIdDeleted4,
}
console.log({
projectId0,
@@ -260,15 +228,7 @@ describe('back_fill_file_hash script', function () {
}
}
- beforeEach(cleanup.everything)
- beforeEach('cleanup s3 buckets', async function () {
- await backupPersistor.deleteDirectory(deksBucket, '')
- await backupPersistor.deleteDirectory(projectBlobsBucket, '')
- expect(await listS3Bucket(deksBucket)).to.have.length(0)
- expect(await listS3Bucket(projectBlobsBucket)).to.have.length(0)
- })
-
- beforeEach('populate mongo', async function () {
+ async function populateMongo() {
await globalBlobs.insertMany([
{ _id: gitBlobHash(fileId6), byteLength: 24, stringLength: 24 },
{ _id: gitBlobHash(fileId8), byteLength: 24, stringLength: 24 },
@@ -334,7 +294,11 @@ describe('back_fill_file_hash script', function () {
fileRefs: [],
folders: [
{
- fileRefs: [{ _id: fileId3 }],
+ fileRefs: [
+ { _id: fileId3 },
+ { _id: fileId10 },
+ { _id: fileId11, hash: gitBlobHash(fileId3) },
+ ],
folders: [],
},
],
@@ -452,18 +416,9 @@ describe('back_fill_file_hash script', function () {
},
},
])
- await deletedFilesCollection.insertMany([
- { _id: fileIdDeleted1, projectId: projectId1 },
- { _id: fileIdDeleted2, projectId: projectIdDeleted0 },
- { _id: fileIdDeleted3, projectId: projectIdDeleted0 },
- {
- _id: fileIdDeleted4,
- projectId: projectIdDeleted0,
- hash: gitBlobHash(fileIdDeleted4),
- },
- { _id: fileIdDeleted5, projectId: projectId0 },
- ])
+ }
+ async function populateHistoryV1() {
await Promise.all([
testProjects.createEmptyProject(historyId0.toString()),
testProjects.createEmptyProject(historyId1),
@@ -485,81 +440,43 @@ describe('back_fill_file_hash script', function () {
await blobStore2.putString(contentTextBlob2.toString())
const blobStoreBadFileTree = new BlobStore(historyIdBadFileTree0.toString())
await blobStoreBadFileTree.putString(contentTextBlob3.toString())
- })
+ }
- beforeEach('populate filestore', async function () {
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectId0}/${fileId0}`,
- Stream.Readable.from([fileId0.toString()])
+ async function populateFilestore() {
+ await mockFilestore.addFile(projectId0, fileId0, fileId0.toString())
+ await mockFilestore.addFile(projectId0, fileId6, fileId6.toString())
+ await mockFilestore.addFile(projectId0, fileId7, contentFile7)
+ await mockFilestore.addFile(projectId1, fileId1, fileId1.toString())
+ await mockFilestore.addFile(projectId2, fileId2, fileId2.toString())
+ await mockFilestore.addFile(projectId3, fileId3, fileId3.toString())
+ await mockFilestore.addFile(
+ projectId3,
+ fileId10,
+ // fileId10 is dupe of fileId3
+ fileId3.toString()
)
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectId0}/${fileId6}`,
- Stream.Readable.from([fileId6.toString()])
+ await mockFilestore.addFile(
+ projectId3,
+ fileId11,
+ // fileId11 is dupe of fileId3
+ fileId3.toString()
)
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectId0}/${fileId7}`,
- Stream.Readable.from([contentFile7])
+ await mockFilestore.addFile(projectIdDeleted0, fileId4, fileId4.toString())
+ await mockFilestore.addFile(projectIdDeleted1, fileId5, fileId5.toString())
+ await mockFilestore.addFile(
+ projectIdBadFileTree3,
+ fileId9,
+ fileId9.toString()
)
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectId0}/${fileIdDeleted5}`,
- Stream.Readable.from([fileIdDeleted5.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectId1}/${fileId1}`,
- Stream.Readable.from([fileId1.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectId2}/${fileId2}`,
- Stream.Readable.from([fileId2.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectId3}/${fileId3}`,
- Stream.Readable.from([fileId3.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectIdDeleted0}/${fileId4}`,
- Stream.Readable.from([fileId4.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectIdDeleted1}/${fileId5}`,
- Stream.Readable.from([fileId5.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectId1}/${fileIdDeleted1}`,
- Stream.Readable.from([fileIdDeleted1.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectIdDeleted0}/${fileIdDeleted2}`,
- Stream.Readable.from([fileIdDeleted2.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectIdDeleted0}/${fileIdDeleted3}`,
- // same content as 2, deduplicate
- Stream.Readable.from([fileIdDeleted2.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectIdDeleted0}/${fileIdDeleted4}`,
- Stream.Readable.from([fileIdDeleted4.toString()])
- )
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectIdBadFileTree3}/${fileId9}`,
- Stream.Readable.from([fileId9.toString()])
- )
- })
+ }
+
+ async function prepareEnvironment() {
+ await cleanup.everything()
+ await mockFilestore.start()
+ await populateMongo()
+ await populateHistoryV1()
+ await populateFilestore()
+ }
/**
* @param {Array} args
@@ -576,7 +493,6 @@ describe('back_fill_file_hash script', function () {
'storage/scripts/back_fill_file_hash.mjs',
'--processNonDeletedProjects=true',
'--processDeletedProjects=true',
- '--processDeletedFiles=true',
...args,
],
{
@@ -585,6 +501,7 @@ describe('back_fill_file_hash script', function () {
env: {
...process.env,
USER_FILES_BUCKET_NAME,
+ SLEEP_BEFORE_EXIT: '1',
...env,
LOG_LEVEL: 'warn', // Override LOG_LEVEL of acceptance tests
},
@@ -601,11 +518,7 @@ describe('back_fill_file_hash script', function () {
expect((await fs.promises.readdir('/tmp')).join(';')).to.not.match(
/back_fill_file_hash/
)
- const extraStatsKeys = [
- 'eventLoop',
- 'readFromGCSThroughputMiBPerSecond',
- 'writeToAWSThroughputMiBPerSecond',
- ]
+ const extraStatsKeys = ['eventLoop', 'readFromGCSThroughputMiBPerSecond']
const stats = JSON.parse(
result.stderr
.split('\n')
@@ -620,7 +533,6 @@ describe('back_fill_file_hash script', function () {
delete stats.time
if (shouldHaveWritten) {
expect(stats.readFromGCSThroughputMiBPerSecond).to.be.greaterThan(0)
- expect(stats.writeToAWSThroughputMiBPerSecond).to.be.greaterThan(0)
}
for (const key of extraStatsKeys) {
delete stats[key]
@@ -658,116 +570,131 @@ describe('back_fill_file_hash script', function () {
return !hasHash // only files without hash processed
})
it('should update mongo', async function () {
- expect(await projectsCollection.find({}).toArray()).to.deep.equal([
- {
- _id: projectId0,
- rootFolder: [
- {
- fileRefs: [
- { _id: fileId8, hash: gitBlobHash(fileId8) },
- { _id: fileId0, hash: gitBlobHash(fileId0) },
- { _id: fileId6, hash: gitBlobHash(fileId6) },
- { _id: fileId7, hash: hashFile7 },
- ],
- folders: [{ fileRefs: [], folders: [] }],
- },
- ],
- overleaf: { history: { id: historyId0 } },
- },
- {
- _id: projectId1,
- rootFolder: [
- {
- fileRefs: [{ _id: fileId1, hash: gitBlobHash(fileId1) }],
- folders: [
- {
- fileRefs: [],
- folders: [
- {
- fileRefs: [{ _id: fileId1, hash: gitBlobHash(fileId1) }],
- folders: [],
- },
- ],
- },
- ],
- },
- ],
- overleaf: { history: { id: historyId1 } },
- },
- {
- _id: projectId2,
- rootFolder: [
- {
- fileRefs: [],
- folders: [
- {
- fileRefs: [],
- folders: [
- {
- fileRefs: [{ _id: fileId2, hash: gitBlobHash(fileId2) }],
- folders: [],
- },
- ],
- },
- ],
- },
- ],
- overleaf: { history: { id: historyId2 } },
- },
- {
- _id: projectId3,
- rootFolder: [
- {
- fileRefs: [],
- folders: [
- {
- fileRefs: [],
- folders: [
- {
- fileRefs: [{ _id: fileId3, hash: gitBlobHash(fileId3) }],
- folders: [],
- },
- ],
- },
- ],
- },
- ],
- overleaf: { history: { id: historyId3 } },
- },
- {
- _id: projectIdNoHistory,
- rootFolder: [{ fileRefs: [], folders: [] }],
- overleaf: { history: { conversionFailed: true } },
- },
- {
- _id: projectIdNoOverleaf,
- rootFolder: [{ fileRefs: [], folders: [] }],
- },
- {
- _id: projectIdBadFileTree0,
- overleaf: { history: { id: historyIdBadFileTree0 } },
- },
- {
- _id: projectIdBadFileTree1,
- rootFolder: [],
- overleaf: { history: { id: historyIdBadFileTree1 } },
- },
- {
- _id: projectIdBadFileTree2,
- rootFolder: [{ fileRefs: [{ _id: null }] }],
- overleaf: { history: { id: historyIdBadFileTree2 } },
- },
- {
- _id: projectIdBadFileTree3,
- rootFolder: [
- {
- folders: [null, { folders: {}, fileRefs: 13 }],
- fileRefs: [{ _id: fileId9, hash: gitBlobHash(fileId9) }],
- },
- ],
- overleaf: { history: { id: historyIdBadFileTree3 } },
- },
- ])
+ expect(await projectsCollection.find({}).toArray())
+ .excludingEvery([
+ 'currentEndTimestamp',
+ 'currentEndVersion',
+ 'updatedAt',
+ 'backup',
+ ])
+ .to.deep.equal([
+ {
+ _id: projectId0,
+ rootFolder: [
+ {
+ fileRefs: [
+ { _id: fileId8, hash: gitBlobHash(fileId8) },
+ { _id: fileId0, hash: gitBlobHash(fileId0) },
+ { _id: fileId6, hash: gitBlobHash(fileId6) },
+ { _id: fileId7, hash: hashFile7 },
+ ],
+ folders: [{ fileRefs: [], folders: [] }],
+ },
+ ],
+ overleaf: { history: { id: historyId0 } },
+ },
+ {
+ _id: projectId1,
+ rootFolder: [
+ {
+ fileRefs: [{ _id: fileId1, hash: gitBlobHash(fileId1) }],
+ folders: [
+ {
+ fileRefs: [],
+ folders: [
+ {
+ fileRefs: [
+ { _id: fileId1, hash: gitBlobHash(fileId1) },
+ ],
+ folders: [],
+ },
+ ],
+ },
+ ],
+ },
+ ],
+ overleaf: { history: { id: historyId1 } },
+ },
+ {
+ _id: projectId2,
+ rootFolder: [
+ {
+ fileRefs: [],
+ folders: [
+ {
+ fileRefs: [],
+ folders: [
+ {
+ fileRefs: [
+ { _id: fileId2, hash: gitBlobHash(fileId2) },
+ ],
+ folders: [],
+ },
+ ],
+ },
+ ],
+ },
+ ],
+ overleaf: { history: { id: historyId2 } },
+ },
+ {
+ _id: projectId3,
+ rootFolder: [
+ {
+ fileRefs: [],
+ folders: [
+ {
+ fileRefs: [],
+ folders: [
+ {
+ fileRefs: [
+ { _id: fileId3, hash: gitBlobHash(fileId3) },
+ { _id: fileId10, hash: gitBlobHash(fileId3) },
+ { _id: fileId11, hash: gitBlobHash(fileId3) },
+ ],
+ folders: [],
+ },
+ ],
+ },
+ ],
+ },
+ ],
+ overleaf: { history: { id: historyId3 } },
+ },
+ {
+ _id: projectIdNoHistory,
+ rootFolder: [{ fileRefs: [], folders: [] }],
+ overleaf: { history: { conversionFailed: true } },
+ },
+ {
+ _id: projectIdNoOverleaf,
+ rootFolder: [{ fileRefs: [], folders: [] }],
+ },
+ {
+ _id: projectIdBadFileTree0,
+ overleaf: { history: { id: historyIdBadFileTree0 } },
+ },
+ {
+ _id: projectIdBadFileTree1,
+ rootFolder: [],
+ overleaf: { history: { id: historyIdBadFileTree1 } },
+ },
+ {
+ _id: projectIdBadFileTree2,
+ rootFolder: [{ fileRefs: [{ _id: null }] }],
+ overleaf: { history: { id: historyIdBadFileTree2 } },
+ },
+ {
+ _id: projectIdBadFileTree3,
+ rootFolder: [
+ {
+ folders: [null, { folders: {}, fileRefs: 13 }],
+ fileRefs: [{ _id: fileId9, hash: gitBlobHash(fileId9) }],
+ },
+ ],
+ overleaf: { history: { id: historyIdBadFileTree3 } },
+ },
+ ])
expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([
{
_id: deleteProjectsRecordId0,
@@ -851,167 +778,6 @@ describe('back_fill_file_hash script', function () {
},
},
])
- expect(await deletedFilesCollection.find({}).toArray()).to.deep.equal([
- {
- _id: fileIdDeleted1,
- projectId: projectId1,
- hash: gitBlobHash(fileIdDeleted1),
- },
- {
- _id: fileIdDeleted2,
- projectId: projectIdDeleted0,
- hash: gitBlobHash(fileIdDeleted2),
- },
- {
- _id: fileIdDeleted3,
- projectId: projectIdDeleted0,
- // uses the same content as fileIdDeleted2
- hash: gitBlobHash(fileIdDeleted2),
- },
- {
- _id: fileIdDeleted4,
- projectId: projectIdDeleted0,
- hash: gitBlobHash(fileIdDeleted4),
- },
- {
- _id: fileIdDeleted5,
- projectId: projectId0,
- hash: gitBlobHash(fileIdDeleted5),
- },
- ])
- expect(
- (await backedUpBlobs.find({}, { sort: { _id: 1 } }).toArray()).map(
- entry => {
- // blobs are pushed unordered into mongo. Sort the list for consistency.
- entry.blobs.sort()
- return entry
- }
- )
- ).to.deep.equal([
- {
- _id: projectId0,
- blobs: [
- binaryForGitBlobHash(gitBlobHash(fileId0)),
- binaryForGitBlobHash(hashFile7),
- binaryForGitBlobHash(gitBlobHash(fileIdDeleted5)),
- binaryForGitBlobHash(hashTextBlob0),
- ].sort(),
- },
- {
- _id: projectId1,
- blobs: [
- binaryForGitBlobHash(gitBlobHash(fileId1)),
- binaryForGitBlobHash(gitBlobHash(fileIdDeleted1)),
- binaryForGitBlobHash(hashTextBlob1),
- ].sort(),
- },
- {
- _id: projectId2,
- blobs: [binaryForGitBlobHash(hashTextBlob2)]
- .concat(
- processHashedFiles
- ? [binaryForGitBlobHash(gitBlobHash(fileId2))]
- : []
- )
- .sort(),
- },
- {
- _id: projectIdDeleted0,
- blobs: [
- binaryForGitBlobHash(gitBlobHash(fileId4)),
- binaryForGitBlobHash(gitBlobHash(fileIdDeleted2)),
- ]
- .concat(
- processHashedFiles
- ? [binaryForGitBlobHash(gitBlobHash(fileIdDeleted4))]
- : []
- )
- .sort(),
- },
- {
- _id: projectId3,
- blobs: [binaryForGitBlobHash(gitBlobHash(fileId3))].sort(),
- },
- ...(processHashedFiles
- ? [
- {
- _id: projectIdDeleted1,
- blobs: [binaryForGitBlobHash(gitBlobHash(fileId5))].sort(),
- },
- ]
- : []),
- {
- _id: projectIdBadFileTree0,
- blobs: [binaryForGitBlobHash(hashTextBlob3)].sort(),
- },
- {
- _id: projectIdBadFileTree3,
- blobs: [binaryForGitBlobHash(gitBlobHash(fileId9))].sort(),
- },
- ])
- })
- it('should process nothing on re-run', async function () {
- const rerun = await runScript(
- processHashedFiles ? ['--processHashedFiles=true'] : [],
- {},
- false
- )
- let stats = {
- ...STATS_ALL_ZERO,
- // We still need to iterate over all the projects and blobs.
- projects: 10,
- blobs: 13,
- backedUpBlobs: 13,
- badFileTrees: 4,
- }
- if (processHashedFiles) {
- stats = sumStats(stats, {
- ...STATS_ALL_ZERO,
- blobs: 3,
- backedUpBlobs: 3,
- })
- }
- expect(rerun.stats).deep.equal(stats)
- })
- it('should have backed up all the files', async function () {
- expect(tieringStorageClass).to.exist
- const blobs = await listS3Bucket(projectBlobsBucket, tieringStorageClass)
- expect(blobs.sort()).to.deep.equal(
- writtenBlobs
- .map(({ historyId, fileId, hash }) =>
- makeProjectKey(historyId, hash || gitBlobHash(fileId))
- )
- .sort()
- )
- for (let { historyId, fileId, hash, content } of writtenBlobs) {
- hash = hash || gitBlobHash(fileId.toString())
- const s = await backupPersistor.getObjectStream(
- projectBlobsBucket,
- makeProjectKey(historyId, hash),
- { autoGunzip: true }
- )
- const buf = new WritableBuffer()
- await Stream.promises.pipeline(s, buf)
- expect(gitBlobHashBuffer(buf.getContents())).to.equal(hash)
- if (content) {
- expect(buf.getContents()).to.deep.equal(content)
- } else {
- const id = buf.getContents().toString('utf-8')
- expect(id).to.equal(fileId.toString())
- // double check we are not comparing 'undefined' or '[object Object]' above
- expect(id).to.match(/^[a-f0-9]{24}$/)
- }
- }
- const deks = await listS3Bucket(deksBucket, 'STANDARD')
- expect(deks.sort()).to.deep.equal(
- Array.from(
- new Set(
- writtenBlobs.map(
- ({ historyId }) => projectKey.format(historyId) + '/dek'
- )
- )
- ).sort()
- )
})
it('should have written the back filled files to history v1', async function () {
for (const { historyId, hash, fileId, content } of writtenBlobs) {
@@ -1031,6 +797,30 @@ describe('back_fill_file_hash script', function () {
expect(id).to.match(/^[a-f0-9]{24}$/)
}
})
+ // Technically, we should move the below test into its own environment to ensure it does not impact any assertions.
+ // Practically, this is slow and moving it to the end of the tests gets us there most of the way.
+ it('should process nothing on re-run', async function () {
+ const rerun = await runScript(
+ processHashedFiles ? ['--processHashedFiles=true'] : [],
+ {},
+ false
+ )
+ let stats = {
+ ...STATS_ALL_ZERO,
+ // We still need to iterate over all the projects and blobs.
+ projects: 10,
+ blobs: 10,
+
+ badFileTrees: 4,
+ }
+ if (processHashedFiles) {
+ stats = sumStats(stats, {
+ ...STATS_ALL_ZERO,
+ blobs: 2,
+ })
+ }
+ expect(rerun.stats).deep.equal(stats)
+ })
}
function expectNotFoundError(result, msg) {
@@ -1052,7 +842,6 @@ describe('back_fill_file_hash script', function () {
const STATS_ALL_ZERO = {
projects: 0,
blobs: 0,
- backedUpBlobs: 0,
filesWithHash: 0,
filesWithoutHash: 0,
filesDuplicated: 0,
@@ -1066,23 +855,16 @@ describe('back_fill_file_hash script', function () {
fileHardDeleted: 0,
badFileTrees: 0,
mongoUpdates: 0,
- deduplicatedWriteToAWSLocalCount: 0,
- deduplicatedWriteToAWSLocalEgress: 0,
- deduplicatedWriteToAWSRemoteCount: 0,
- deduplicatedWriteToAWSRemoteEgress: 0,
readFromGCSCount: 0,
readFromGCSIngress: 0,
- writeToAWSCount: 0,
- writeToAWSEgress: 0,
writeToGCSCount: 0,
writeToGCSEgress: 0,
}
const STATS_UP_TO_PROJECT1 = {
projects: 2,
blobs: 2,
- backedUpBlobs: 0,
filesWithHash: 0,
- filesWithoutHash: 7,
+ filesWithoutHash: 5,
filesDuplicated: 1,
filesRetries: 0,
filesFailed: 0,
@@ -1093,24 +875,17 @@ describe('back_fill_file_hash script', function () {
projectHardDeleted: 0,
fileHardDeleted: 0,
badFileTrees: 0,
- mongoUpdates: 6,
- deduplicatedWriteToAWSLocalCount: 0,
- deduplicatedWriteToAWSLocalEgress: 0,
- deduplicatedWriteToAWSRemoteCount: 0,
- deduplicatedWriteToAWSRemoteEgress: 0,
- readFromGCSCount: 8,
- readFromGCSIngress: 4000134,
- writeToAWSCount: 7,
- writeToAWSEgress: 4086,
- writeToGCSCount: 5,
- writeToGCSEgress: 4000096,
+ mongoUpdates: 2, // 4-2 blobs written to backedUpBlobs collection
+ readFromGCSCount: 4,
+ readFromGCSIngress: 4000072,
+ writeToGCSCount: 3,
+ writeToGCSEgress: 4000048,
}
const STATS_UP_FROM_PROJECT1_ONWARD = {
projects: 8,
blobs: 2,
- backedUpBlobs: 0,
filesWithHash: 0,
- filesWithoutHash: 5,
+ filesWithoutHash: 4,
filesDuplicated: 0,
filesRetries: 0,
filesFailed: 0,
@@ -1121,28 +896,20 @@ describe('back_fill_file_hash script', function () {
projectHardDeleted: 0,
fileHardDeleted: 0,
badFileTrees: 4,
- mongoUpdates: 10,
- deduplicatedWriteToAWSLocalCount: 1,
- deduplicatedWriteToAWSLocalEgress: 30,
- deduplicatedWriteToAWSRemoteCount: 0,
- deduplicatedWriteToAWSRemoteEgress: 0,
- readFromGCSCount: 7,
- readFromGCSIngress: 134,
- writeToAWSCount: 6,
- writeToAWSEgress: 173,
- writeToGCSCount: 4,
- writeToGCSEgress: 96,
+ mongoUpdates: 3, // previously 5 blobs written to backedUpBlobs collection
+ readFromGCSCount: 4,
+ readFromGCSIngress: 96,
+ writeToGCSCount: 3,
+ writeToGCSEgress: 72,
}
const STATS_FILES_HASHED_EXTRA = {
...STATS_ALL_ZERO,
- filesWithHash: 3,
- mongoUpdates: 1,
- readFromGCSCount: 3,
- readFromGCSIngress: 72,
- writeToAWSCount: 3,
- writeToAWSEgress: 89,
- writeToGCSCount: 3,
- writeToGCSEgress: 72,
+ filesWithHash: 2,
+ mongoUpdates: 0, // previously 2 blobs written to backedUpBlobs collection
+ readFromGCSCount: 2,
+ readFromGCSIngress: 48,
+ writeToGCSCount: 2,
+ writeToGCSEgress: 48,
}
function sumStats(a, b) {
@@ -1154,78 +921,73 @@ describe('back_fill_file_hash script', function () {
STATS_UP_FROM_PROJECT1_ONWARD
)
- it('should gracefully handle fatal errors', async function () {
- await FILESTORE_PERSISTOR.deleteObject(
- USER_FILES_BUCKET_NAME,
- `${projectId0}/${fileId0}`
- )
- const t0 = Date.now()
- const { stats, result } = await tryRunScript([], {
- RETRIES: '10',
- RETRY_DELAY_MS: '1000',
- })
- const t1 = Date.now()
- expectNotFoundError(result, 'failed to process file')
- expect(result.status).to.equal(1)
- expect(stats).to.deep.equal(
- sumStats(STATS_ALL, {
- ...STATS_ALL_ZERO,
- filesFailed: 1,
- readFromGCSIngress: -24,
- writeToAWSCount: -1,
- writeToAWSEgress: -28,
- writeToGCSCount: -1,
- writeToGCSEgress: -24,
- })
- )
- // should not retry 404
- expect(result.stdout).to.not.include('failed to process file, trying again')
- expect(t1 - t0).to.be.below(10_000)
- })
+ describe('error cases', () => {
+ beforeEach('prepare environment', prepareEnvironment)
- it('should retry on error', async function () {
- await FILESTORE_PERSISTOR.deleteObject(
- USER_FILES_BUCKET_NAME,
- `${projectId0}/${fileId0}`
- )
- const restoreFileAfter5s = async () => {
- await setTimeout(5_000)
- await FILESTORE_PERSISTOR.sendStream(
- USER_FILES_BUCKET_NAME,
- `${projectId0}/${fileId0}`,
- Stream.Readable.from([fileId0.toString()])
+ it('should gracefully handle fatal errors', async function () {
+ mockFilestore.deleteObject(projectId0, fileId0)
+ const t0 = Date.now()
+ const { stats, result } = await tryRunScript([], {
+ RETRIES: '10',
+ RETRY_DELAY_MS: '1000',
+ })
+ const t1 = Date.now()
+ expectNotFoundError(result, 'failed to process file')
+ expect(result.status).to.equal(1)
+ expect(stats).to.deep.equal(
+ sumStats(STATS_ALL, {
+ ...STATS_ALL_ZERO,
+ filesFailed: 1,
+ readFromGCSIngress: -24,
+ writeToGCSCount: -1,
+ writeToGCSEgress: -24,
+ })
+ )
+ // should not retry 404
+ expect(result.stdout).to.not.include(
+ 'failed to process file, trying again'
+ )
+ expect(t1 - t0).to.be.below(10_000)
+ })
+
+ it('should retry on error', async function () {
+ mockFilestore.deleteObject(projectId0, fileId0)
+ const restoreFileAfter5s = async () => {
+ await setTimeout(5_000)
+ mockFilestore.addFile(projectId0, fileId0, fileId0.toString())
+ }
+ // use Promise.allSettled to ensure the above sendStream call finishes before this test completes
+ const [
+ {
+ value: { stats, result },
+ },
+ ] = await Promise.allSettled([
+ tryRunScript([], {
+ RETRY_DELAY_MS: '100',
+ RETRIES: '60',
+ RETRY_FILESTORE_404: 'true', // 404s are the easiest to simulate in tests
+ }),
+ restoreFileAfter5s(),
+ ])
+ expectNotFoundError(result, 'failed to process file, trying again')
+ expect(result.status).to.equal(0)
+ expect({ ...stats, filesRetries: 0, readFromGCSCount: 0 }).to.deep.equal({
+ ...STATS_ALL,
+ filesRetries: 0,
+ readFromGCSCount: 0,
+ })
+ expect(stats.filesRetries).to.be.greaterThan(0, 'should have retried')
+ expect(stats.readFromGCSCount).to.be.greaterThan(
+ STATS_ALL.readFromGCSCount,
+ 'should have read more times from GCS compared to normal operations'
)
- }
- // use Promise.allSettled to ensure the above sendStream call finishes before this test completes
- const [
- {
- value: { stats, result },
- },
- ] = await Promise.allSettled([
- tryRunScript([], {
- RETRY_DELAY_MS: '100',
- RETRIES: '60',
- RETRY_FILESTORE_404: 'true', // 404s are the easiest to simulate in tests
- }),
- restoreFileAfter5s(),
- ])
- expectNotFoundError(result, 'failed to process file, trying again')
- expect(result.status).to.equal(0)
- expect({ ...stats, filesRetries: 0, readFromGCSCount: 0 }).to.deep.equal({
- ...STATS_ALL,
- filesRetries: 0,
- readFromGCSCount: 0,
})
- expect(stats.filesRetries).to.be.greaterThan(0, 'should have retried')
- expect(stats.readFromGCSCount).to.be.greaterThan(
- STATS_ALL.readFromGCSCount,
- 'should have read more times from GCS compared to normal operations'
- )
})
describe('full run CONCURRENCY=1', function () {
let output
- beforeEach('run script', async function () {
+ before('prepare environment', prepareEnvironment)
+ before('run script', async function () {
output = await runScript([], {
CONCURRENCY: '1',
})
@@ -1234,6 +996,7 @@ describe('back_fill_file_hash script', function () {
/**
* @param {ObjectId} projectId
* @param {string} msg
+ * @param {string} path
*/
function expectBadFileTreeMessage(projectId, msg, path) {
const line = output.result.stdout
@@ -1288,29 +1051,35 @@ describe('back_fill_file_hash script', function () {
)
})
commonAssertions()
+ })
- describe('when processing hashed files later', function () {
- let output
- beforeEach('run script', async function () {
- output = await runScript(['--processHashedFiles=true'], {})
- })
- it('should print stats', function () {
- expect(output.stats).deep.equal({
- ...STATS_FILES_HASHED_EXTRA,
- projects: 10,
- blobs: 13,
- backedUpBlobs: 13,
- badFileTrees: 4,
- mongoUpdates: 3,
- })
- })
- commonAssertions(true)
+ describe('when processing hashed files later', function () {
+ let output1, output2
+ before('prepare environment', prepareEnvironment)
+ before('run script without hashed files', async function () {
+ output1 = await runScript([], {})
})
+ before('run script with hashed files', async function () {
+ output2 = await runScript(['--processHashedFiles=true'], {})
+ })
+ it('should print stats for the first run without hashed files', function () {
+ expect(output1.stats).deep.equal(STATS_ALL)
+ })
+ it('should print stats for the hashed files run', function () {
+ expect(output2.stats).deep.equal({
+ ...STATS_FILES_HASHED_EXTRA,
+ projects: 10,
+ blobs: 10,
+ badFileTrees: 4,
+ })
+ })
+ commonAssertions(true)
})
describe('full run CONCURRENCY=10', function () {
let output
- beforeEach('run script', async function () {
+ before('prepare environment', prepareEnvironment)
+ before('run script', async function () {
output = await runScript([], {
CONCURRENCY: '10',
})
@@ -1323,7 +1092,8 @@ describe('back_fill_file_hash script', function () {
describe('full run STREAM_HIGH_WATER_MARK=1MB', function () {
let output
- beforeEach('run script', async function () {
+ before('prepare environment', prepareEnvironment)
+ before('run script', async function () {
output = await runScript([], {
STREAM_HIGH_WATER_MARK: (1024 * 1024).toString(),
})
@@ -1336,19 +1106,27 @@ describe('back_fill_file_hash script', function () {
describe('when processing hashed files', function () {
let output
- beforeEach('run script', async function () {
+ before('prepare environment', prepareEnvironment)
+ before('run script', async function () {
output = await runScript(['--processHashedFiles=true'], {})
})
it('should print stats', function () {
expect(output.stats).deep.equal(
- sumStats(STATS_ALL, STATS_FILES_HASHED_EXTRA)
+ sumStats(STATS_ALL, {
+ ...STATS_FILES_HASHED_EXTRA,
+ readFromGCSCount: 3,
+ readFromGCSIngress: 72,
+ mongoUpdates: 0,
+ filesWithHash: 3,
+ })
)
})
commonAssertions(true)
})
describe('with something in the bucket already', function () {
- beforeEach('create a file in s3', async function () {
+ before('prepare environment', prepareEnvironment)
+ before('create a file in s3', async function () {
const buf = Buffer.from(fileId0.toString())
await backupPersistor.sendStream(
projectBlobsBucket,
@@ -1358,7 +1136,7 @@ describe('back_fill_file_hash script', function () {
)
})
let output
- beforeEach('run script', async function () {
+ before('run script', async function () {
output = await runScript([], {
CONCURRENCY: '1',
})
@@ -1368,47 +1146,6 @@ describe('back_fill_file_hash script', function () {
expect(output.stats).deep.equal(
sumStats(STATS_ALL, {
...STATS_ALL_ZERO,
- // one remote deduplicate
- deduplicatedWriteToAWSRemoteCount: 1,
- deduplicatedWriteToAWSRemoteEgress: 28,
- writeToAWSEgress: -28, // subtract skipped egress
- })
- )
- })
- commonAssertions()
- })
-
- describe('with something in the bucket and marked as processed', function () {
- beforeEach('create a file in s3', async function () {
- await backupPersistor.sendStream(
- projectBlobsBucket,
- makeProjectKey(historyId0, hashTextBlob0),
- Stream.Readable.from([contentTextBlob0]),
- { contentLength: contentTextBlob0.byteLength }
- )
- await backedUpBlobs.insertMany([
- {
- _id: projectId0,
- blobs: [binaryForGitBlobHash(hashTextBlob0)],
- },
- ])
- })
- let output
- beforeEach('run script', async function () {
- output = await runScript([], {
- CONCURRENCY: '1',
- })
- })
-
- it('should print stats', function () {
- expect(output.stats).deep.equal(
- sumStats(STATS_ALL, {
- ...STATS_ALL_ZERO,
- backedUpBlobs: 1,
- writeToAWSCount: -1,
- writeToAWSEgress: -27,
- readFromGCSCount: -1,
- readFromGCSIngress: -7,
})
)
})
@@ -1419,17 +1156,91 @@ describe('back_fill_file_hash script', function () {
// part0: project0+project1, part1: project2 onwards
const edge = projectId1.toString()
let outputPart0, outputPart1
- beforeEach('run script on part 0', async function () {
+ before('prepare environment', prepareEnvironment)
+ before('run script on part 0', async function () {
outputPart0 = await runScript([`--BATCH_RANGE_END=${edge}`], {
CONCURRENCY: '1',
})
})
- beforeEach('run script on part 1', async function () {
+ before('run script on part 1', async function () {
outputPart1 = await runScript([`--BATCH_RANGE_START=${edge}`], {
CONCURRENCY: '1',
})
})
+ it('should print stats for part 0', function () {
+ expect(outputPart0.stats).to.deep.equal(STATS_UP_TO_PROJECT1)
+ })
+ it('should print stats for part 1', function () {
+ expect(outputPart1.stats).to.deep.equal(STATS_UP_FROM_PROJECT1_ONWARD)
+ })
+ commonAssertions()
+ })
+
+ describe('projectIds from file', () => {
+ const path0 = '/tmp/project-ids-0.txt'
+ const path1 = '/tmp/project-ids-1.txt'
+ before('prepare environment', prepareEnvironment)
+ before('create project-ids.txt files', async function () {
+ await fs.promises.writeFile(
+ path0,
+ [projectId0, projectId1].map(id => id.toString()).join('\n')
+ )
+ await fs.promises.writeFile(
+ path1,
+ [
+ projectId2,
+ projectId3,
+ projectIdDeleted0,
+ projectIdDeleted1,
+ projectIdNoHistory,
+ projectIdNoHistoryDeleted,
+ projectIdHardDeleted,
+ projectIdNoOverleaf,
+ projectIdNoOverleafDeleted,
+ projectIdBadFileTree0,
+ projectIdBadFileTree1,
+ projectIdBadFileTree2,
+ projectIdBadFileTree3,
+ ]
+ .map(id => id.toString())
+ .join('\n')
+ )
+ })
+
+ let outputPart0, outputPart1
+ before('run script on part 0', async function () {
+ outputPart0 = await runScript([`--projectIdsFrom=${path0}`])
+ })
+ before('run script on part 1', async function () {
+ outputPart1 = await runScript([`--projectIdsFrom=${path1}`])
+ })
+
+ /**
+ * @param {string} msg
+ * @param {ObjectId} projectId
+ */
+ function expectLogEntry(msg, projectId) {
+ expect(outputPart1.result.stdout).to.include(msg)
+ const log = JSON.parse(
+ outputPart1.result.stdout
+ .split('\n')
+ .find(l => l.includes(`"${msg}"`) && l.includes(projectId.toString()))
+ )
+ expect(log).to.contain({
+ projectId: projectId.toString(),
+ msg,
+ })
+ }
+ it('should flag the hard-deleted project', function () {
+ expectLogEntry('project hard-deleted', projectIdHardDeleted)
+ })
+ it('should flag the projects without history id', function () {
+ expectLogEntry('project has no history id', projectIdNoOverleaf)
+ expectLogEntry('project has no history id', projectIdNoOverleafDeleted)
+ expectLogEntry('project has no history id', projectIdNoHistory)
+ expectLogEntry('project has no history id', projectIdNoHistoryDeleted)
+ })
it('should print stats', function () {
expect(outputPart0.stats).to.deep.equal(STATS_UP_TO_PROJECT1)
expect(outputPart1.stats).to.deep.equal(STATS_UP_FROM_PROJECT1_ONWARD)
diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash_fix_up.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash_fix_up.test.mjs
new file mode 100644
index 0000000000..3aa00d685a
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash_fix_up.test.mjs
@@ -0,0 +1,621 @@
+import fs from 'node:fs'
+import Crypto from 'node:crypto'
+import { promisify } from 'node:util'
+import { Binary, ObjectId } from 'mongodb'
+import { Blob } from 'overleaf-editor-core'
+import { db } from '../../../../storage/lib/mongodb.js'
+import cleanup from './support/cleanup.js'
+import testProjects from '../api/support/test_projects.js'
+import { execFile } from 'node:child_process'
+import chai, { expect } from 'chai'
+import chaiExclude from 'chai-exclude'
+import { BlobStore } from '../../../../storage/lib/blob_store/index.js'
+import { mockFilestore } from './support/MockFilestore.mjs'
+
+chai.use(chaiExclude)
+
+const TIMEOUT = 20 * 1_000
+
+const projectsCollection = db.collection('projects')
+const deletedProjectsCollection = db.collection('deletedProjects')
+
+/**
+ * @param {ObjectId} objectId
+ * @return {string}
+ */
+function gitBlobHash(objectId) {
+ return gitBlobHashBuffer(Buffer.from(objectId.toString()))
+}
+
+/**
+ * @param {Buffer} buf
+ * @return {string}
+ */
+function gitBlobHashBuffer(buf) {
+ const sha = Crypto.createHash('sha1')
+ sha.update(`blob ${buf.byteLength}\x00`)
+ sha.update(buf)
+ return sha.digest('hex')
+}
+
+/**
+ * @param {string} gitBlobHash
+ * @return {Binary}
+ */
+function binaryForGitBlobHash(gitBlobHash) {
+ return new Binary(Buffer.from(gitBlobHash, 'hex'))
+}
+
+function objectIdFromTime(timestamp) {
+ return ObjectId.createFromTime(new Date(timestamp).getTime() / 1000)
+}
+
+const PRINT_IDS_AND_HASHES_FOR_DEBUGGING = false
+
+describe('back_fill_file_hash_fix_up script', function () {
+ this.timeout(TIMEOUT)
+ const USER_FILES_BUCKET_NAME = 'fake-user-files-gcs'
+
+ const projectId0 = objectIdFromTime('2017-01-01T00:00:00Z')
+ const projectIdDeleted0 = objectIdFromTime('2017-01-01T00:04:00Z')
+ const historyId0 = 42 // stored as number is mongo
+ const historyIdDeleted0 = projectIdDeleted0.toString()
+ const fileIdWithDifferentHashFound = objectIdFromTime('2017-02-01T00:00:00Z')
+ const fileIdInGoodState = objectIdFromTime('2017-02-01T00:01:00Z')
+ const fileIdWithDifferentHashNotFound0 = objectIdFromTime(
+ '2017-02-01T00:03:00Z'
+ )
+ const fileIdWithDifferentHashNotFound1 = objectIdFromTime(
+ '2017-02-01T00:04:00Z'
+ )
+ const fileIdBlobExistsInGCSCorrupted = objectIdFromTime(
+ '2017-02-01T00:05:00Z'
+ )
+ const fileIdMissing0 = objectIdFromTime('2024-02-01T00:06:00Z')
+ const fileIdMissing1 = objectIdFromTime('2017-02-01T00:07:00Z')
+ const fileIdWithDifferentHashRestore = objectIdFromTime(
+ '2017-02-01T00:08:00Z'
+ )
+ const fileIdMissing2 = objectIdFromTime('2017-02-01T00:12:00Z')
+ const fileIdHashMissing0 = objectIdFromTime('2017-02-01T00:13:00Z')
+ const fileIdHashMissing1 = objectIdFromTime('2017-02-01T00:14:00Z')
+ const contentCorruptedBlob = 'string that produces another hash'
+ const contentDoesNotExistAsBlob = 'does not exist as blob'
+ const hashDoesNotExistAsBlob = gitBlobHashBuffer(
+ Buffer.from(contentDoesNotExistAsBlob)
+ )
+ const deleteProjectsRecordId0 = new ObjectId()
+ const writtenBlobs = [
+ {
+ projectId: projectId0,
+ historyId: historyId0,
+ fileId: fileIdWithDifferentHashNotFound0,
+ },
+ {
+ projectId: projectId0,
+ historyId: historyId0,
+ fileId: fileIdHashMissing0,
+ },
+ {
+ projectId: projectId0,
+ historyId: historyId0,
+ fileId: fileIdHashMissing1,
+ },
+ {
+ projectId: projectIdDeleted0,
+ historyId: historyIdDeleted0,
+ fileId: fileIdWithDifferentHashNotFound1,
+ },
+ ]
+ const logs = [
+ {
+ projectId: projectId0,
+ fileId: fileIdWithDifferentHashFound,
+ err: { message: 'OError: hash mismatch' },
+ hash: gitBlobHash(fileIdMissing0), // does not matter
+ entry: {
+ ctx: { historyId: historyId0.toString() },
+ hash: gitBlobHash(fileIdInGoodState),
+ },
+ msg: 'failed to process file',
+ },
+ {
+ projectId: projectId0,
+ fileId: fileIdWithDifferentHashRestore,
+ err: { message: 'OError: hash mismatch' },
+ hash: hashDoesNotExistAsBlob,
+ entry: {
+ ctx: { historyId: historyId0.toString() },
+ hash: gitBlobHash(fileIdMissing0), // does not matter
+ },
+ msg: 'failed to process file',
+ },
+ {
+ projectId: projectId0,
+ fileId: fileIdWithDifferentHashNotFound0,
+ err: { message: 'OError: hash mismatch' },
+ hash: gitBlobHash(fileIdWithDifferentHashNotFound0),
+ entry: {
+ ctx: { historyId: historyId0.toString() },
+ hash: hashDoesNotExistAsBlob,
+ },
+ msg: 'failed to process file',
+ },
+ {
+ projectId: projectIdDeleted0,
+ fileId: fileIdWithDifferentHashNotFound1,
+ err: { message: 'OError: hash mismatch' },
+ hash: gitBlobHash(fileIdWithDifferentHashNotFound1),
+ entry: {
+ ctx: { historyId: historyIdDeleted0.toString() },
+ hash: hashDoesNotExistAsBlob,
+ },
+ msg: 'failed to process file',
+ },
+ {
+ projectId: projectId0,
+ fileId: fileIdMissing0,
+ bucketName: USER_FILES_BUCKET_NAME,
+ err: { message: 'NotFoundError' },
+ msg: 'failed to process file',
+ },
+ {
+ projectId: projectId0,
+ fileId: fileIdMissing2,
+ bucketName: USER_FILES_BUCKET_NAME,
+ err: { message: 'NotFoundError' },
+ msg: 'failed to process file',
+ },
+ {
+ projectId: projectIdDeleted0,
+ fileId: fileIdMissing1,
+ bucketName: USER_FILES_BUCKET_NAME,
+ err: { message: 'NotFoundError' },
+ msg: 'failed to process file',
+ },
+ {
+ err: { message: 'spurious error' },
+ msg: 'failed to process file, trying again',
+ },
+ {
+ err: { message: 'some other error' },
+ msg: 'failed to process file',
+ },
+ // from find_malformed_filetrees.mjs
+ {
+ projectId: projectId0,
+ _id: fileIdHashMissing0,
+ reason: 'bad file hash',
+ msg: 'bad file-tree path',
+ },
+ {
+ projectId: projectId0,
+ _id: fileIdHashMissing1,
+ reason: 'bad file hash',
+ msg: 'bad file-tree path',
+ },
+ {
+ projectId: projectId0,
+ _id: fileIdBlobExistsInGCSCorrupted,
+ reason: 'bad file hash',
+ msg: 'bad file-tree path',
+ },
+ ]
+ if (PRINT_IDS_AND_HASHES_FOR_DEBUGGING) {
+ const fileIds = {
+ fileIdWithDifferentHashFound,
+ fileIdInGoodState,
+ fileIdWithDifferentHashNotFound0,
+ fileIdWithDifferentHashNotFound1,
+ fileIdMissing0,
+ fileIdMissing1,
+ fileIdMissing2,
+ fileIdWithDifferentHashRestore,
+ fileIdHashMissing0,
+ fileIdHashMissing1,
+ }
+ console.log({
+ projectId0,
+ projectIdDeleted0,
+ historyId0,
+ historyIdDeleted0,
+ ...fileIds,
+ hashDoesNotExistAsBlob,
+ })
+ for (const [name, v] of Object.entries(fileIds)) {
+ console.log(
+ name,
+ gitBlobHash(v),
+ Array.from(binaryForGitBlobHash(gitBlobHash(v)).value())
+ )
+ }
+ }
+
+ before(cleanup.everything)
+
+ before('populate blobs/GCS', async function () {
+ await mockFilestore.start()
+ mockFilestore.addFile(
+ projectId0,
+ fileIdHashMissing0,
+ fileIdHashMissing0.toString()
+ )
+ mockFilestore.addFile(
+ projectId0,
+ fileIdHashMissing1,
+ fileIdHashMissing1.toString()
+ )
+ mockFilestore.addFile(
+ projectId0,
+ fileIdBlobExistsInGCSCorrupted,
+ fileIdBlobExistsInGCSCorrupted.toString()
+ )
+ await new BlobStore(historyId0.toString()).putString(
+ fileIdHashMissing1.toString() // partially processed
+ )
+ const path = '/tmp/test-blob-corrupted'
+ try {
+ await fs.promises.writeFile(path, contentCorruptedBlob)
+ await new BlobStore(historyId0.toString()).putBlob(
+ path,
+ new Blob(gitBlobHash(fileIdBlobExistsInGCSCorrupted), 42)
+ )
+ } finally {
+ await fs.promises.rm(path, { force: true })
+ }
+ await cleanup.postgres()
+ await cleanup.mongo()
+ await Promise.all([
+ testProjects.createEmptyProject(historyId0.toString()),
+ testProjects.createEmptyProject(historyIdDeleted0),
+ ])
+ await new BlobStore(historyId0.toString()).putString(
+ fileIdWithDifferentHashNotFound0.toString()
+ )
+ await new BlobStore(historyIdDeleted0.toString()).putString(
+ fileIdWithDifferentHashNotFound1.toString()
+ )
+ await new BlobStore(historyId0.toString()).putString(
+ fileIdInGoodState.toString()
+ )
+ })
+
+ before('populate mongo', async function () {
+ await projectsCollection.insertMany([
+ {
+ _id: projectId0,
+ rootFolder: [
+ {
+ fileRefs: [
+ { _id: fileIdMissing0 },
+ { _id: fileIdMissing0 }, // bad file-tree, duplicated fileRef.
+ { _id: fileIdMissing2 },
+ { _id: fileIdHashMissing0 },
+ { _id: fileIdHashMissing1 },
+ {
+ _id: fileIdWithDifferentHashFound,
+ hash: gitBlobHash(fileIdInGoodState),
+ },
+ {
+ _id: fileIdWithDifferentHashRestore,
+ hash: gitBlobHash(fileIdMissing0),
+ },
+ ],
+ folders: [
+ {
+ docs: [],
+ },
+ null,
+ {
+ fileRefs: [
+ null,
+ {
+ _id: fileIdInGoodState,
+ hash: gitBlobHash(fileIdInGoodState),
+ },
+ {
+ _id: fileIdWithDifferentHashNotFound0,
+ hash: hashDoesNotExistAsBlob,
+ },
+ {
+ _id: fileIdBlobExistsInGCSCorrupted,
+ hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
+ },
+ ],
+ folders: [],
+ },
+ ],
+ },
+ ],
+ overleaf: { history: { id: historyId0 } },
+ version: 0,
+ },
+ ])
+ await deletedProjectsCollection.insertMany([
+ {
+ _id: deleteProjectsRecordId0,
+ project: {
+ _id: projectIdDeleted0,
+ rootFolder: [
+ {
+ fileRefs: [
+ {
+ _id: fileIdWithDifferentHashNotFound1,
+ hash: hashDoesNotExistAsBlob,
+ },
+ ],
+ folders: [
+ {
+ fileRefs: [],
+ folders: [
+ { fileRefs: [{ _id: fileIdMissing1 }], folders: [] },
+ ],
+ },
+ ],
+ },
+ ],
+ overleaf: { history: { id: historyIdDeleted0 } },
+ version: 100,
+ },
+ deleterData: {
+ deletedProjectId: projectIdDeleted0,
+ },
+ },
+ ])
+ })
+
+ /**
+ * @param {Array} args
+ * @param {Record} env
+ * @return {Promise<{ stdout: string, stderr: string, status: number }>}
+ */
+ async function tryRunScript(args = [], env = {}) {
+ let result
+ try {
+ result = await promisify(execFile)(
+ process.argv0,
+ ['storage/scripts/back_fill_file_hash_fix_up.mjs', ...args],
+ {
+ encoding: 'utf-8',
+ timeout: TIMEOUT - 500,
+ env: {
+ ...process.env,
+ USER_FILES_BUCKET_NAME,
+ SLEEP_BEFORE_EXIT: '1',
+ ...env,
+ LOG_LEVEL: 'warn', // Override LOG_LEVEL of acceptance tests
+ },
+ }
+ )
+ result.status = 0
+ } catch (err) {
+ const { stdout, stderr, code } = err
+ if (typeof code !== 'number') {
+ console.log(err)
+ }
+ result = { stdout, stderr, status: code }
+ }
+ expect((await fs.promises.readdir('/tmp')).join(';')).to.not.match(
+ /back_fill_file_hash/
+ )
+ return result
+ }
+ async function runScriptWithLogs() {
+ const logsPath = '/tmp/test-script-logs'
+ let result
+ try {
+ await fs.promises.writeFile(
+ logsPath,
+ logs.map(e => JSON.stringify(e)).join('\n')
+ )
+ result = await tryRunScript([`--logs=${logsPath}`])
+ } finally {
+ await fs.promises.rm(logsPath, { force: true })
+ }
+ const stats = JSON.parse(result.stdout.trim().split('\n').pop())
+ return {
+ result,
+ stats,
+ }
+ }
+
+ let result, stats
+ before(async function () {
+ ;({ result, stats } = await runScriptWithLogs())
+ })
+ it('should print stats', function () {
+ expect(stats).to.contain({
+ processedLines: 12,
+ success: 7,
+ alreadyProcessed: 0,
+ fileDeleted: 0,
+ skipped: 0,
+ failed: 3,
+ unmatched: 1,
+ })
+ })
+ it('should handle re-run on same logs', async function () {
+ ;({ stats } = await runScriptWithLogs())
+ expect(stats).to.contain({
+ processedLines: 12,
+ success: 0,
+ alreadyProcessed: 4,
+ fileDeleted: 3,
+ skipped: 0,
+ failed: 3,
+ unmatched: 1,
+ })
+ })
+ it('should flag the unknown fatal error', function () {
+ const unknown = result.stdout
+ .split('\n')
+ .filter(l => l.includes('unknown fatal error'))
+ expect(unknown).to.have.length(1)
+ const [line] = unknown
+ expect(line).to.exist
+ expect(line).to.include('some other error')
+ })
+ it('should flag the unexpected blob on mismatched hash', function () {
+ const line = result.stdout
+ .split('\n')
+ .find(l => l.includes('found blob with computed filestore object hash'))
+ expect(line).to.exist
+ expect(line).to.include(projectId0.toString())
+ expect(line).to.include(fileIdWithDifferentHashFound.toString())
+ expect(line).to.include(gitBlobHash(fileIdInGoodState))
+ })
+ it('should flag the need to restore', function () {
+ const line = result.stdout
+ .split('\n')
+ .find(l => l.includes('missing blob, need to restore filestore file'))
+ expect(line).to.exist
+ expect(line).to.include(projectId0.toString())
+ expect(line).to.include(fileIdWithDifferentHashRestore.toString())
+ expect(line).to.include(hashDoesNotExistAsBlob)
+ })
+ it('should flag the corrupted blob', function () {
+ const line = result.stdout
+ .split('\n')
+ .find(l => l.includes('blob corrupted'))
+ expect(line).to.exist
+ expect(line).to.include(projectId0.toString())
+ expect(line).to.include(fileIdBlobExistsInGCSCorrupted.toString())
+ expect(line).to.include(
+ gitBlobHashBuffer(Buffer.from(contentCorruptedBlob))
+ )
+ expect(line).to.include(gitBlobHash(fileIdBlobExistsInGCSCorrupted))
+ })
+ it('should update mongo', async function () {
+ expect(await projectsCollection.find({}).toArray())
+ .excludingEvery([
+ 'currentEndTimestamp',
+ 'currentEndVersion',
+ 'updatedAt',
+ 'backup',
+ ])
+ .to.deep.equal([
+ {
+ _id: projectId0,
+ rootFolder: [
+ {
+ fileRefs: [
+ // Removed
+ // { _id: fileIdMissing0 },
+ // Removed
+ // { _id: fileIdMissing2 },
+ // Added hash
+ {
+ _id: fileIdHashMissing0,
+ hash: gitBlobHash(fileIdHashMissing0),
+ },
+ // Added hash
+ {
+ _id: fileIdHashMissing1,
+ hash: gitBlobHash(fileIdHashMissing1),
+ },
+ // No change, should warn about the find.
+ {
+ _id: fileIdWithDifferentHashFound,
+ hash: gitBlobHash(fileIdInGoodState),
+ },
+ // No change, should warn about the need to restore.
+ {
+ _id: fileIdWithDifferentHashRestore,
+ hash: gitBlobHash(fileIdMissing0),
+ },
+ ],
+ folders: [
+ {
+ docs: [],
+ },
+ null,
+ {
+ fileRefs: [
+ null,
+ // No change
+ {
+ _id: fileIdInGoodState,
+ hash: gitBlobHash(fileIdInGoodState),
+ },
+ // Updated hash
+ {
+ _id: fileIdWithDifferentHashNotFound0,
+ hash: gitBlobHash(fileIdWithDifferentHashNotFound0),
+ },
+ // No change, flagged
+ {
+ _id: fileIdBlobExistsInGCSCorrupted,
+ hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
+ },
+ ],
+ folders: [],
+ },
+ ],
+ },
+ ],
+ overleaf: { history: { id: historyId0 } },
+ // Incremented when removing file/updating hash
+ version: 5,
+ },
+ ])
+ expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([
+ {
+ _id: deleteProjectsRecordId0,
+ project: {
+ _id: projectIdDeleted0,
+ rootFolder: [
+ {
+ fileRefs: [
+ // Updated hash
+ {
+ _id: fileIdWithDifferentHashNotFound1,
+ hash: gitBlobHash(fileIdWithDifferentHashNotFound1),
+ },
+ ],
+ folders: [
+ {
+ fileRefs: [],
+ folders: [
+ {
+ fileRefs: [
+ // Removed
+ // { _id: fileIdMissing1 },
+ ],
+ folders: [],
+ },
+ ],
+ },
+ ],
+ },
+ ],
+ overleaf: { history: { id: historyIdDeleted0 } },
+ // Incremented when removing file/updating hash
+ version: 102,
+ },
+ deleterData: {
+ deletedProjectId: projectIdDeleted0,
+ },
+ },
+ ])
+ const writtenBlobsByProject = new Map()
+ for (const { projectId, fileId } of writtenBlobs) {
+ writtenBlobsByProject.set(
+ projectId,
+ (writtenBlobsByProject.get(projectId) || []).concat([fileId])
+ )
+ }
+ })
+ it('should have written the back filled files to history v1', async function () {
+ for (const { historyId, fileId } of writtenBlobs) {
+ const blobStore = new BlobStore(historyId.toString())
+ const hash = gitBlobHash(fileId.toString())
+ const blob = await blobStore.getBlob(hash)
+ expect(blob).to.exist
+ expect(blob.getByteLength()).to.equal(24)
+ const id = await blobStore.getString(hash)
+ expect(id).to.equal(fileId.toString())
+ // double check we are not comparing 'undefined' or '[object Object]' above
+ expect(id).to.match(/^[a-f0-9]{24}$/)
+ }
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/storage/backup.test.mjs b/services/history-v1/test/acceptance/js/storage/backup.test.mjs
new file mode 100644
index 0000000000..fdca1ce294
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/backup.test.mjs
@@ -0,0 +1,682 @@
+import config from 'config'
+import { ObjectId } from 'mongodb'
+import { expect } from 'chai'
+import {
+ backedUpBlobs,
+ client,
+ globalBlobs,
+} from '../../../../storage/lib/mongodb.js'
+import persistor from '../../../../storage/lib/persistor.js'
+import {
+ loadGlobalBlobs,
+ BlobStore,
+ makeProjectKey,
+} from '../../../../storage/lib/blob_store/index.js'
+import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
+import projectKey from '../../../../storage/lib/project_key.js'
+import { getBackupStatus } from '../../../../storage/lib/backup_store/index.js'
+import { text, buffer } from 'node:stream/consumers'
+import { createGunzip } from 'node:zlib'
+import { Change, Operation, File, TextOperation } from 'overleaf-editor-core'
+import ChunkStore from '../../../../storage/lib/chunk_store/index.js'
+import persistChanges from '../../../../storage/lib/persist_changes.js'
+import { historyStore } from '../../../../storage/lib/history_store.js'
+import { execFile } from 'node:child_process'
+import { promisify } from 'node:util'
+import testFiles from '../storage/support/test_files.js'
+import fs from 'node:fs'
+import {
+ backupBlob,
+ storeBlobBackup,
+} from '../../../../storage/lib/backupBlob.mjs'
+import {
+ backupPersistor,
+ projectBlobsBucket,
+ chunksBucket,
+} from '../../../../storage/lib/backupPersistor.mjs'
+import { Readable } from 'node:stream'
+
+const projectsCollection = client.db().collection('projects')
+
+/**
+ * @param {ObjectId} projectId
+ * @param {number} version
+ * @return {string}
+ */
+function makeChunkKey(projectId, version) {
+ return projectKey.format(projectId) + '/' + projectKey.pad(version)
+}
+
+describe('backup script', function () {
+ let project
+ let projectId, historyId
+ let limitsToPersistImmediately
+
+ before(function () {
+ // Used to provide a limit which forces us to persist all of the changes
+ const farFuture = new Date()
+ farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
+ limitsToPersistImmediately = {
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ maxChanges: 10,
+ maxChunkChanges: 10,
+ }
+ })
+
+ beforeEach(async function () {
+ // Set up test projects with proper history metadata
+ projectId = new ObjectId()
+ historyId = projectId.toString()
+ project = {
+ _id: projectId,
+ overleaf: {
+ history: {
+ id: historyId,
+ currentEndVersion: 0, // Will be updated as changes are made
+ currentEndTimestamp: new Date(), // Will be updated as changes are made
+ },
+ backup: {
+ // Start with no backup state
+ },
+ },
+ }
+
+ // Pre-load the global blobs
+ await loadGlobalBlobs()
+
+ // Clean up any pre-existing test data
+ await projectsCollection.deleteMany({
+ _id: projectId,
+ })
+ await backedUpBlobs.deleteMany({}) // Clear any existing backedUpBlobs entries
+ })
+
+ describe('with simple project content', function () {
+ const contentString = 'hello world'
+ const newContentString = 'hello world more'
+ const graphPngPath = testFiles.path('graph.png')
+ const graphPngBuf = fs.readFileSync(graphPngPath)
+ const graphPngHash = testFiles.GRAPH_PNG_HASH
+ const nonBmpPath = testFiles.path('non_bmp.txt')
+ const DUMMY_HASH = '1111111111111111111111111111111111111111'
+
+ beforeEach(async function () {
+ // Create initial project
+ await projectsCollection.insertOne(project)
+
+ // Initialize project in chunk store
+ await ChunkStore.initializeProject(historyId)
+
+ const blobStore = new BlobStore(historyId)
+
+ // Create the blobs and then back them up using backupBlob
+ const graphPngBlob = await blobStore.putFile(graphPngPath)
+ await backupBlob(historyId, graphPngBlob, graphPngPath)
+
+ // Add initial content using persistChanges
+ const file = File.fromString(contentString)
+ const addFileOp = Operation.addFile('main.tex', file)
+ const addGraphFileOp = Operation.addFile(
+ 'graph.png',
+ File.fromHash(testFiles.GRAPH_PNG_HASH)
+ )
+ const change1 = new Change([addFileOp, addGraphFileOp], new Date(), [])
+
+ await persistChanges(historyId, [change1], limitsToPersistImmediately, 0)
+
+ // Add a second change with a proper TextOperation
+ // For text operation: first number is how many chars to retain, then the text to insert
+ const textOp = TextOperation.fromJSON({
+ textOperation: [contentString.length, ' more'], // Keep existing content, append ' more'
+ })
+ const editOp = Operation.editFile('main.tex', textOp)
+ const change2 = new Change([editOp], new Date(), [])
+
+ // store an unrelated hash in the backedUpBlobs collection,
+ // so we can test that only the backed up hashes are cleared.
+ await storeBlobBackup(historyId, DUMMY_HASH)
+
+ await persistChanges(historyId, [change2], limitsToPersistImmediately, 1)
+ })
+
+ it('should perform an initial backup', async function () {
+ // Run backup script for initial version
+ const { stdout } = await runBackupScript(['--projectId', projectId])
+ expect(stdout).to.not.include(
+ 'warning: persistor not passed to backupBlob'
+ )
+
+ // Verify backup state
+ const result = await getBackupStatus(projectId)
+ expect(result.backupStatus.lastBackedUpVersion).to.equal(2)
+ expect(result.backupStatus.lastBackedUpAt).to.be.an.instanceOf(Date)
+ expect(result.currentEndTimestamp).to.be.an.instanceOf(Date)
+ expect(result.backupStatus.pendingChangeAt).to.be.undefined
+
+ // Verify graph.png blob was backed up
+ const graphBlobStream = await backupPersistor.getObjectStream(
+ projectBlobsBucket,
+ makeProjectKey(historyId, graphPngHash),
+ { autoGunzip: true }
+ )
+ const graphBlobContent = await buffer(graphBlobStream)
+ expect(graphBlobContent.equals(graphPngBuf)).to.be.true
+
+ // Verify chunk was backed up
+ const chunkStream = await backupPersistor.getObjectStream(
+ chunksBucket,
+ makeChunkKey(historyId, 0)
+ )
+ const chunkContent = await text(chunkStream.pipe(createGunzip()))
+ const chunkMetadata = await ChunkStore.getLatestChunkMetadata(historyId)
+ const rawHistory = await historyStore.loadRaw(historyId, chunkMetadata.id)
+ expect(JSON.parse(chunkContent)).to.deep.equal(rawHistory)
+
+ // Unrelated entries from backedUpBlobs should be not cleared
+ const backedUpBlobsDoc = await backedUpBlobs.findOne({
+ _id: project._id,
+ })
+ expect(backedUpBlobsDoc).not.to.be.null
+ expect(backedUpBlobsDoc.blobs).to.have.length(1)
+ expect(backedUpBlobsDoc.blobs[0].toString('hex')).to.equal(DUMMY_HASH)
+ })
+
+ it('should perform an incremental backup', async function () {
+ // Backup first version
+ const { stdout: stdout1 } = await runBackupScript([
+ '--projectId',
+ projectId,
+ ])
+ expect(stdout1).to.not.include(
+ 'warning: persistor not passed to backupBlob'
+ )
+
+ // Verify first backup
+ const result1 = await getBackupStatus(projectId)
+ expect(result1.backupStatus.lastBackedUpVersion).to.equal(2)
+
+ // Persist additional changes
+ const additionalTextOp = TextOperation.fromJSON({
+ textOperation: [newContentString.length, ' even more'], // Keep existing content, append ' even more'
+ })
+ const additionalEditOp = Operation.editFile('main.tex', additionalTextOp)
+ const firstTimestamp = new Date()
+ const additionalChange = new Change(
+ [additionalEditOp],
+ firstTimestamp,
+ []
+ )
+
+ // add the nonbmp file
+ const blobStore = new BlobStore(historyId)
+ const nonBmpBlob = await blobStore.putFile(nonBmpPath)
+ await backupBlob(historyId, nonBmpBlob, nonBmpPath)
+
+ // Verify that the non-BMP file was backed up when the file was added
+ const newBackedUpBlobs = await backedUpBlobs.findOne({
+ _id: project._id,
+ })
+ expect(newBackedUpBlobs).not.to.be.null
+ expect(newBackedUpBlobs.blobs).to.have.length(2)
+ expect(
+ newBackedUpBlobs.blobs.map(b => b.toString('hex'))
+ ).to.have.members([testFiles.NON_BMP_TXT_HASH, DUMMY_HASH])
+
+ const addNonBmpFileOp = Operation.addFile(
+ 'non_bmp.txt',
+ File.fromHash(testFiles.NON_BMP_TXT_HASH)
+ )
+ const secondTimestamp = new Date()
+ const additionalChange2 = new Change(
+ [addNonBmpFileOp],
+ secondTimestamp,
+ []
+ )
+
+ await persistChanges(
+ historyId,
+ [additionalChange, additionalChange2],
+ limitsToPersistImmediately,
+ 2
+ )
+
+ const afterChangeResult = await getBackupStatus(projectId)
+ // Verify that the currentEndVersion and currentEndTimestamp are updated
+ expect(afterChangeResult.currentEndVersion).to.equal(4)
+ expect(afterChangeResult.currentEndTimestamp)
+ .to.be.an.instanceOf(Date)
+ .and.to.be.greaterThan(result1.currentEndTimestamp)
+ // Persisting a change should not modify the backup version and timestamp
+ expect(afterChangeResult.backupStatus.lastBackedUpVersion).to.equal(2)
+ expect(afterChangeResult.backupStatus.lastBackedUpAt)
+ .to.be.an.instanceOf(Date)
+ .and.to.deep.equal(result1.backupStatus.lastBackedUpAt)
+ // but it should update the pendingChangeAt timestamp to the timestamp of the
+ // first change which modified the project
+ expect(afterChangeResult.backupStatus.pendingChangeAt)
+ .to.be.an.instanceOf(Date)
+ .and.to.deep.equal(firstTimestamp)
+
+ // Second backup
+ const { stdout: stdout2 } = await runBackupScript([
+ '--projectId',
+ projectId,
+ ])
+ expect(stdout2).to.not.include(
+ 'warning: persistor not passed to backupBlob'
+ )
+
+ // Verify incremental backup
+ const result2 = await getBackupStatus(projectId)
+ // The backup version and timestamp should be updated
+ expect(result2.backupStatus.lastBackedUpVersion).to.equal(4)
+ expect(result2.backupStatus.lastBackedUpAt)
+ .to.be.an.instanceOf(Date)
+ .and.to.be.greaterThan(result1.backupStatus.lastBackedUpAt)
+ // The currentEndVersion and currentEndTimestamp should not be modified
+ expect(result2.currentEndVersion).to.equal(4)
+ expect(result2.currentEndTimestamp)
+ .to.be.an.instanceOf(Date)
+ .and.to.deep.equal(afterChangeResult.currentEndTimestamp)
+ // The pendingChangeAt timestamp should be cleared when the backup is complete
+ expect(result2.backupStatus.pendingChangeAt).to.be.undefined
+
+ // Verify additional blob was backed up
+ const newBlobStream = await backupPersistor.getObjectStream(
+ projectBlobsBucket,
+ makeProjectKey(historyId, testFiles.NON_BMP_TXT_HASH),
+ { autoGunzip: true }
+ )
+ const newBlobContent = await buffer(newBlobStream)
+ expect(newBlobContent).to.deep.equal(
+ fs.readFileSync(testFiles.path('non_bmp.txt'))
+ )
+
+ // Check chunk was backed up
+ const chunkStream = await backupPersistor.getObjectStream(
+ chunksBucket,
+ makeChunkKey(historyId, 0)
+ )
+ const chunkContent = await text(chunkStream.pipe(createGunzip()))
+ const chunkMetadata = await ChunkStore.getLatestChunkMetadata(historyId)
+ const rawHistory = await historyStore.loadRaw(historyId, chunkMetadata.id)
+ expect(JSON.parse(chunkContent)).to.deep.equal(rawHistory)
+
+ // Unrelated entries from backedUpBlobs should be not cleared
+ const backedUpBlobsDoc = await backedUpBlobs.findOne({
+ _id: project._id,
+ })
+ expect(backedUpBlobsDoc).not.to.be.null
+ expect(backedUpBlobsDoc.blobs).to.have.length(1)
+ expect(backedUpBlobsDoc.blobs[0].toString('hex')).to.equal(DUMMY_HASH)
+ })
+
+ it('should not backup global blobs', async function () {
+ const globalBlobString = 'a'
+ const globalBlobHash = testFiles.STRING_A_HASH
+ await globalBlobs.insertOne({
+ _id: globalBlobHash,
+ byteLength: globalBlobString.length,
+ stringLength: globalBlobString.length,
+ })
+ const bucket = config.get('blobStore.globalBucket')
+ for (const { key, content } of [
+ {
+ key: '2e/65/efe2a145dda7ee51d1741299f848e5bf752e',
+ content: globalBlobString,
+ },
+ ]) {
+ const stream = Readable.from([content])
+ await persistor.sendStream(bucket, key, stream)
+ }
+ await loadGlobalBlobs()
+
+ // Create a change using the global blob
+ const addFileOp = Operation.addFile(
+ 'global.tex',
+ File.fromHash(globalBlobHash)
+ )
+ const change = new Change([addFileOp], new Date(), [])
+
+ await persistChanges(historyId, [change], limitsToPersistImmediately, 2)
+
+ // Run backup
+ await runBackupScript(['--projectId', projectId])
+
+ // Verify global blob wasn't backed up
+ try {
+ await backupPersistor.getObjectStream(
+ projectBlobsBucket,
+ makeProjectKey(historyId, globalBlobHash),
+ { autoGunzip: true }
+ )
+ expect.fail('Should not find global blob in project blobs')
+ } catch (err) {
+ expect(err).to.be.an.instanceOf(NotFoundError)
+ }
+ })
+
+ it('should back up global blobs if they are demoted', async function () {
+ const demotedBlobString = 'ab'
+ const demotedBlobHash = testFiles.STRING_AB_HASH
+ await globalBlobs.insertOne({
+ _id: demotedBlobHash,
+ byteLength: demotedBlobString.length,
+ stringLength: demotedBlobString.length,
+ demoted: true,
+ })
+ const bucket = config.get('blobStore.globalBucket')
+ for (const { key, content } of [
+ {
+ key: '9a/e9/e86b7bd6cb1472d9373702d8249973da0832',
+ content: demotedBlobString,
+ },
+ ]) {
+ const stream = Readable.from([content])
+ await persistor.sendStream(bucket, key, stream)
+ }
+ await loadGlobalBlobs()
+
+ // Create a change using the global blob
+ const addFileOp = Operation.addFile(
+ 'demoted.tex',
+ File.fromHash(demotedBlobHash)
+ )
+ const change = new Change([addFileOp], new Date(), [])
+
+ await persistChanges(historyId, [change], limitsToPersistImmediately, 2)
+
+ // Run backup
+ const { stdout } = await runBackupScript(['--projectId', projectId])
+ expect(stdout).to.not.include(
+ 'warning: persistor not passed to backupBlob'
+ )
+
+ // Check chunk was backed up
+ const chunkStream = await backupPersistor.getObjectStream(
+ chunksBucket,
+ makeChunkKey(historyId, 0)
+ )
+ const chunkContent = await text(chunkStream.pipe(createGunzip()))
+ const chunkMetadata = await ChunkStore.getLatestChunkMetadata(historyId)
+ const rawHistory = await historyStore.loadRaw(historyId, chunkMetadata.id)
+ expect(JSON.parse(chunkContent)).to.deep.equal(rawHistory)
+
+ // Verify that the demoted global blob was backed up
+ try {
+ const demotedBlobStream = await backupPersistor.getObjectStream(
+ projectBlobsBucket,
+ makeProjectKey(historyId, demotedBlobHash),
+ {
+ autoGunzip: true,
+ }
+ )
+ const demotedBlobContent = await buffer(demotedBlobStream)
+ expect(demotedBlobContent).to.deep.equal(Buffer.from(demotedBlobString))
+ } catch (err) {
+ expect.fail('Should find demoted global blob in project blobs')
+ }
+ })
+ })
+
+ describe('with complex project content', function () {
+ let beforeInitializationTimestamp
+ let afterInitializationTimestamp
+
+ beforeEach(async function () {
+ // Create initial project
+ await projectsCollection.insertOne(project)
+
+ // Initialize project in chunk store
+ // bracket the initialisation with two timestamps to check the pendingChangeAt field
+ beforeInitializationTimestamp = new Date()
+ await ChunkStore.initializeProject(historyId)
+ afterInitializationTimestamp = new Date()
+
+ const blobStore = new BlobStore(historyId)
+
+ // Set up test files with varying content
+ const testFilesData = {
+ mainTex: { name: 'main.tex', content: 'Initial content' },
+ chapter1: { name: 'chapter1.tex', content: 'Chapter 1 content' },
+ chapter2: { name: 'chapter2.tex', content: 'Chapter 2 content' },
+ bibliography: {
+ name: 'bibliography.bib',
+ content: '@article{key1,\n title={Title1}\n}',
+ newContent: '@article{key2,\n title={Title2}\n}',
+ },
+ graph: {
+ name: 'graph.png',
+ path: testFiles.path('graph.png'),
+ hash: testFiles.GRAPH_PNG_HASH,
+ },
+ unicodeFile: {
+ name: 'unicodeFile.tex',
+ path: testFiles.path('non_bmp.txt'),
+ hash: testFiles.NON_BMP_TXT_HASH,
+ },
+ }
+
+ const textFiles = [
+ testFilesData.mainTex,
+ testFilesData.chapter1,
+ testFilesData.chapter2,
+ testFilesData.bibliography,
+ ]
+ const binaryFiles = [testFilesData.graph, testFilesData.unicodeFile]
+
+ // Add binary files first
+ await Promise.all(binaryFiles.map(file => blobStore.putFile(file.path)))
+
+ // Back up the binary files
+ await Promise.all(
+ binaryFiles.map(async file => {
+ await backupBlob(
+ historyId,
+ await blobStore.putFile(file.path),
+ file.path
+ )
+ })
+ )
+
+ // Create operations to add all files initially
+ const addFileOperations = Object.values(testFilesData).map(file => {
+ if (file.path) {
+ return Operation.addFile(file.name, File.fromHash(file.hash))
+ }
+ return Operation.addFile(file.name, File.fromString(file.content))
+ })
+
+ // Initial change adding all files
+ const initialChange = new Change(addFileOperations, new Date(), [])
+ await persistChanges(
+ historyId,
+ [initialChange],
+ limitsToPersistImmediately,
+ 0
+ )
+
+ // Generate a series of edit operations for each text file
+ const editOperations = []
+ for (let i = 0; i < 50; i++) {
+ const targetFile = textFiles[i % textFiles.length]
+ if (!targetFile.path) {
+ // Skip binary/unicode files
+ const appendText = `\n\nEdit ${i + 1}`
+ targetFile.content += appendText
+ const textOp = TextOperation.fromJSON({
+ textOperation: [
+ targetFile.content.length - appendText.length,
+ appendText,
+ ],
+ })
+ const editOp = Operation.editFile(targetFile.name, textOp)
+ editOperations.push(new Change([editOp], new Date(), []))
+ }
+ }
+
+ // Add a delete operation
+ const deleteChange = new Change(
+ [Operation.removeFile(testFilesData.bibliography.name)],
+ new Date(),
+ []
+ )
+ editOperations.push(deleteChange)
+
+ // Add the file back with different content
+ const addBackChange = new Change(
+ [
+ Operation.addFile(
+ testFilesData.bibliography.name,
+ File.fromString(testFilesData.bibliography.newContent)
+ ),
+ ],
+ new Date(),
+ []
+ )
+ editOperations.push(addBackChange)
+ // Persist all changes
+ await persistChanges(
+ historyId,
+ editOperations,
+ limitsToPersistImmediately,
+ 1
+ )
+ })
+
+ it('persistChanges should set the pendingChangeAt field to the time of snapshot initialisation', async function () {
+ const result = await getBackupStatus(projectId)
+ expect(result.backupStatus.pendingChangeAt).to.be.an.instanceOf(Date)
+ expect(result.backupStatus.pendingChangeAt)
+ .to.be.greaterThan(beforeInitializationTimestamp)
+ .and.to.be.lessThan(afterInitializationTimestamp)
+ })
+
+ it('should backup all chunks and blobs from a complex project history', async function () {
+ // Run backup script
+ const { stdout } = await runBackupScript(['--projectId', projectId])
+ expect(stdout).to.not.include(
+ 'warning: persistor not passed to backupBlob'
+ )
+
+ // Verify backup state
+ const result = await getBackupStatus(projectId)
+ expect(result.backupStatus.lastBackedUpVersion).to.equal(53) // 1 initial change + 50 edits + 1 delete + 1 add back
+ expect(result.backupStatus.lastBackedUpAt).to.be.an.instanceOf(Date)
+ expect(result.currentEndTimestamp).to.be.an.instanceOf(Date)
+ expect(result.backupStatus.pendingChangeAt).to.be.undefined
+
+ // Verify that binary files were backed up
+ for (const hash of [
+ testFiles.GRAPH_PNG_HASH,
+ testFiles.NON_BMP_TXT_HASH,
+ ]) {
+ const blobStream = await backupPersistor.getObjectStream(
+ projectBlobsBucket,
+ makeProjectKey(historyId, hash),
+ { autoGunzip: true }
+ )
+ expect(blobStream).to.exist
+ }
+
+ // Get all chunks and verify they were backed up
+ const listing = await backupPersistor
+ ._getClientForBucket(chunksBucket)
+ .listObjectsV2({
+ Bucket: chunksBucket,
+ Prefix: projectKey.format(historyId) + '/',
+ })
+ .promise()
+ const chunkKeys = listing.Contents.map(item => item.Key)
+ expect(chunkKeys.length).to.equal(6) // Should have multiple chunks
+
+ const localChunks = await ChunkStore.getProjectChunks(historyId)
+ const chunksByStartVersion = new Map()
+ for (const chunkRecord of localChunks) {
+ chunksByStartVersion.set(chunkRecord.startVersion, chunkRecord)
+ }
+
+ // Verify the content of each chunk matches what's in the history store
+ for (const chunkKey of chunkKeys) {
+ const chunkStream = await backupPersistor.getObjectStream(
+ chunksBucket,
+ chunkKey
+ )
+ const chunkContent = await text(chunkStream.pipe(createGunzip()))
+ const startVersion = parseInt(chunkKey.split('/').pop(), 10)
+ const chunk = chunksByStartVersion.get(startVersion)
+ const rawHistory = await historyStore.loadRaw(historyId, chunk.id)
+ expect(JSON.parse(chunkContent)).to.deep.equal(rawHistory)
+ }
+ })
+
+ it('should throw an error if downloading a blob fails', async function () {
+ const blobStore = new BlobStore(historyId)
+ const blob = await blobStore.putFile(
+ testFiles.path('null_characters.txt')
+ )
+ const change = new Change(
+ [Operation.addFile('broken-file', File.fromHash(blob.getHash()))],
+ new Date(),
+ []
+ )
+ // Persist all changes
+ await persistChanges(historyId, [change], limitsToPersistImmediately, 53)
+
+ // Delete the blob from the underlying storage to simulate a failure
+ const bucket = config.get('blobStore.projectBucket')
+ const key = makeProjectKey(historyId, blob.getHash())
+ await persistor.deleteObject(bucket, key)
+
+ // Run backup script - it should fail because the blob is missing
+ let result
+ try {
+ result = await runBackupScript(['--projectId', projectId])
+ expect.fail('Backup script should have failed')
+ } catch (err) {
+ expect(err).to.exist
+ expect(result).to.not.exist
+ }
+
+ // Verify that backup did not complete
+ const newBackupStatus = await getBackupStatus(projectId)
+ expect(newBackupStatus.backupStatus.lastBackedUpVersion).to.equal(50) // backup fails on final chunk
+ expect(newBackupStatus.currentEndVersion).to.equal(54) // backup is incomplete due to missing blob
+ })
+ })
+})
+
+/**
+ * Run the backup script with given arguments
+ * @param {string[]} args
+ */
+async function runBackupScript(args) {
+ const TIMEOUT = 20 * 1000
+ let result
+ try {
+ result = await promisify(execFile)(
+ 'node',
+ ['storage/scripts/backup.mjs', ...args],
+ {
+ encoding: 'utf-8',
+ timeout: TIMEOUT,
+ env: {
+ ...process.env,
+ LOG_LEVEL: 'debug', // Override LOG_LEVEL of acceptance tests
+ },
+ }
+ )
+ result.status = 0
+ } catch (err) {
+ const { stdout, stderr, code } = err
+ if (typeof code !== 'number') {
+ console.log(err)
+ }
+ result = { stdout, stderr, status: code }
+ }
+ if (result.status !== 0) {
+ throw new Error('backup failed')
+ }
+ return result
+}
diff --git a/services/history-v1/test/acceptance/js/storage/backupBlob.test.mjs b/services/history-v1/test/acceptance/js/storage/backupBlob.test.mjs
index 161acb7a55..73ead77a46 100644
--- a/services/history-v1/test/acceptance/js/storage/backupBlob.test.mjs
+++ b/services/history-v1/test/acceptance/js/storage/backupBlob.test.mjs
@@ -5,8 +5,14 @@ import {
makeBlobForFile,
getStringLengthOfFile,
makeProjectKey,
+ BlobStore,
} from '../../../../storage/lib/blob_store/index.js'
-import { backupBlob } from '../../../../storage/lib/backupBlob.mjs'
+import { Blob } from 'overleaf-editor-core'
+import { insertBlob } from '../../../../storage/lib/blob_store/mongo.js'
+import {
+ backupBlob,
+ downloadBlobToDir,
+} from '../../../../storage/lib/backupBlob.mjs'
import fs from 'node:fs'
import path from 'node:path'
import os from 'node:os'
@@ -18,6 +24,7 @@ import {
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import { WritableBuffer } from '@overleaf/stream-utils'
+import cleanup from './support/cleanup.js'
async function listS3BucketRaw(bucket) {
const client = backupPersistor._getClientForBucket(bucket)
@@ -55,14 +62,7 @@ describe('backupBlob', function () {
}
})
- beforeEach(async function () {
- await backupPersistor.deleteDirectory(projectBlobsBucket, '')
- expect(await listS3Bucket(projectBlobsBucket)).to.have.length(0)
- })
-
- beforeEach('cleanup mongo', async function () {
- await backedUpBlobs.deleteMany({})
- })
+ beforeEach(cleanup.everything)
describe('when the blob is already backed up', function () {
let blob
@@ -157,6 +157,12 @@ describe('backupBlob', function () {
content: Buffer.from('x'.repeat(1000)),
storedSize: 29, // zlib.gzipSync(content).byteLength
},
+ {
+ name: 'large text file',
+ // 'ä' is a 2-byte utf-8 character -> 4MB.
+ content: Buffer.from('ü'.repeat(2 * 1024 * 1024)),
+ storedSize: 4101, // zlib.gzipSync(content).byteLength
+ },
{
name: 'binary file',
content: Buffer.from([0, 1, 2, 3]),
@@ -202,3 +208,71 @@ describe('backupBlob', function () {
})
}
})
+
+describe('downloadBlobToDir', function () {
+ let tmpDirDownload
+ const historyId = 'abc123def456abc789def123'
+
+ before(async function () {
+ tmpDirDownload = await fs.promises.mkdtemp(
+ path.join(os.tmpdir(), 'downloadBlobTest-')
+ )
+ })
+
+ after(async function () {
+ await fs.promises.rm(tmpDirDownload, { recursive: true, force: true })
+ })
+
+ it('should download the blob successfully', async function () {
+ const data = 'hello world'
+ // Use putString instead of writing a source file and using makeBlobForFile
+ const blobStore = new BlobStore(historyId)
+ const blob = await blobStore.putString(data)
+
+ // Now call downloadBlobToDir which will use blobStore.getStream internally
+ const downloadedFilePath = await downloadBlobToDir(
+ historyId,
+ blob,
+ tmpDirDownload
+ )
+ const contents = await fs.promises.readFile(downloadedFilePath, 'utf8')
+ expect(contents).to.equal(data)
+ })
+
+ it('should delete the file on error (if file already exists)', async function () {
+ const data = 'data that will not be written'
+ const blobStore = new BlobStore(historyId)
+ const blob = await blobStore.putString(data)
+ const hash = blob.getHash()
+ const fileName = `${historyId}-${hash}`
+
+ // Pre-create the destination file to trigger a failure due to an existing file
+ const downloadedFilePath = path.join(tmpDirDownload, fileName)
+ await fs.promises.writeFile(downloadedFilePath, 'preexisting content')
+
+ try {
+ await downloadBlobToDir(historyId, blob, tmpDirDownload)
+ expect.fail('should not reach here')
+ } catch (error) {
+ // Check that the file was deleted
+ await expect(fs.promises.access(downloadedFilePath)).to.be.rejected
+ }
+ })
+
+ it('should not leave an empty file if download fails', async function () {
+ // Create a blob with a hash that does not exist in the blob store
+ const hash = '0000000000000000000000000000000000000000'
+ const blob = new Blob(hash, 12, 12)
+ await insertBlob(historyId, blob)
+ const fileName = `${historyId}-${hash}`
+ try {
+ await downloadBlobToDir(historyId, blob, tmpDirDownload)
+ expect.fail('should not reach here')
+ } catch (error) {
+ expect(error).to.be.instanceOf(Blob.NotFoundError)
+ const downloadedFilePath = path.join(tmpDirDownload, fileName)
+ // Check that the file was deleted
+ await expect(fs.promises.access(downloadedFilePath)).to.be.rejected
+ }
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/storage/backupPersistor.test.mjs b/services/history-v1/test/acceptance/js/storage/backupPersistor.test.mjs
new file mode 100644
index 0000000000..e9aedac77f
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/backupPersistor.test.mjs
@@ -0,0 +1,51 @@
+import {
+ pathToProjectFolder,
+ projectBlobsBucket,
+} from '../../../../storage/lib/backupPersistor.mjs'
+import { expect } from 'chai'
+
+describe('backupPersistor', () => {
+ describe('pathToProjectFolder', () => {
+ it('handles postgres and mongo-ids', function () {
+ expect(pathToProjectFolder(projectBlobsBucket, '100/000/000')).to.equal(
+ '100/000/000/'
+ )
+ expect(pathToProjectFolder(projectBlobsBucket, '100/000/000/')).to.equal(
+ '100/000/000/'
+ )
+ expect(
+ pathToProjectFolder(projectBlobsBucket, '100/000/000/foo')
+ ).to.equal('100/000/000/')
+ expect(pathToProjectFolder(projectBlobsBucket, '210/000/000')).to.equal(
+ '210/000/000/'
+ )
+ expect(pathToProjectFolder(projectBlobsBucket, '987/654/321')).to.equal(
+ '987/654/321/'
+ )
+ expect(pathToProjectFolder(projectBlobsBucket, '987/654/3219')).to.equal(
+ '987/654/3219/'
+ )
+ expect(
+ pathToProjectFolder(projectBlobsBucket, 'fed/cba/987654321000000000')
+ ).to.equal('fed/cba/987654321000000000/')
+ expect(
+ pathToProjectFolder(projectBlobsBucket, 'fed/cba/987654321000000000/')
+ ).to.equal('fed/cba/987654321000000000/')
+ expect(
+ pathToProjectFolder(
+ projectBlobsBucket,
+ 'fed/cba/987654321000000000/foo'
+ )
+ ).to.equal('fed/cba/987654321000000000/')
+ })
+
+ it('rejects invalid input', function () {
+ const cases = ['', '//', '1/2/3', '123/456/78', 'abc/d/e', 'abc/def/012']
+ for (const key of cases) {
+ expect(() => {
+ pathToProjectFolder(projectBlobsBucket, key)
+ }, key).to.throw('invalid project folder')
+ }
+ })
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/storage/backup_generator.test.mjs b/services/history-v1/test/acceptance/js/storage/backup_generator.test.mjs
new file mode 100644
index 0000000000..3c0e8f1e67
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/backup_generator.test.mjs
@@ -0,0 +1,338 @@
+import { expect } from 'chai'
+import { backupGenerator } from '../../../../storage/lib/backupGenerator.mjs'
+import ChunkStore from '../../../../storage/lib/chunk_store/index.js'
+import persistChanges from '../../../../storage/lib/persist_changes.js'
+import {
+ Change,
+ Operation,
+ TextOperation,
+ AddFileOperation,
+ File,
+} from 'overleaf-editor-core'
+import { ObjectId } from 'mongodb'
+import testFiles from './support/test_files.js'
+import { BlobStore } from '../../../../storage/lib/blob_store/index.js'
+import fs from 'node:fs'
+import blobHash from '../../../../storage/lib/blob_hash.js'
+
+const scenarios = [
+ {
+ description: 'Postgres history',
+ createProject: ChunkStore.initializeProject,
+ },
+ {
+ description: 'Mongo history',
+ createProject: () =>
+ ChunkStore.initializeProject(new ObjectId().toString()),
+ },
+]
+
+for (const scenario of scenarios) {
+ describe(`backupGenerator with ${scenario.description}`, function () {
+ let projectId
+ let limitsToPersistImmediately
+ let blobStore
+ const NUM_CHUNKS = 3
+ const FINAL_VERSION = 24
+
+ before(function () {
+ // used to provide a limit which forces us to persist all of the changes
+ const farFuture = new Date()
+ farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
+ limitsToPersistImmediately = {
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ maxChunkChanges: 10,
+ }
+ })
+
+ beforeEach(async function () {
+ projectId = await scenario.createProject()
+ blobStore = new BlobStore(projectId)
+
+ // Add test files first
+ await Promise.all([
+ blobStore.putFile(testFiles.path('graph.png')),
+ blobStore.putFile(testFiles.path('non_bmp.txt')),
+ ])
+
+ const HELLO_TXT = fs.readFileSync(testFiles.path('hello.txt')).toString()
+
+ // Create a sample project history for testing, with a chunk size of 10
+ //
+ // 1. Add a text file main.tex with contents from hello.txt
+ // 2. Add a binary file image.png with contents from graph.png
+ // 3. Add a text file other.tex with empty contents
+ // 4. Apply 10 changes that append characters to the end of other.tex giving 'aaaaaaaaaa'
+ // In applying the 10 changes we hit the first chunk boundary and create a new chunk.
+ // The first chunk contains the 3 file operations and 7 changes
+ // to other.tex which is now "aaaaaaa" (7 characters)
+ // snapshot: {}
+ // changes: add main.tex, add image.png, add other.tex, 7 changes to other.tex
+ // The second chunk has a snapshot with the existing files
+ // snapshot: main.tex, image.png, other.tex="aaaaaaa" (7 characters)
+ // changes: 3 changes to other.tex, each appending 'a'
+ // 5. Now we add a new file non_bmp.txt with non-BMP characters
+ // 6. Finally we apply 10 more changes to other.tex, each appending another 'a' to give 'aaaaaaaaaaaaaaaaaaaa' (20 characters)
+ // In applying the 10 changes we hit another chunk boundary and create a third chunk.
+ // The final state of the second chunk is
+ // snapshot: main.tex, image.png, other.tex="aaaaaaa" (7 characters)
+ // changes:
+ // 3 changes to other.tex, each appending 'a'
+ // add file non_bmp.txt,
+ // 6 changes to other.tex, each appending 'a'
+ // The third chunk will contain the last 4 changes to other.tex
+ // snapshot: main.tex, image.png, non_bmp.tex, other.tex="aaaaaaaaaaaaaaaa" (16 characters)
+ // changes: 4 changes to other.tex, each appending 'a'
+
+ const textChange = new Change(
+ [new AddFileOperation('main.tex', File.fromString(HELLO_TXT))],
+ new Date(),
+ []
+ )
+ const binaryChange = new Change(
+ [
+ new AddFileOperation(
+ 'image.png',
+ File.fromHash(testFiles.GRAPH_PNG_HASH)
+ ),
+ ],
+ new Date(),
+ []
+ )
+ const otherChange = new Change(
+ [new AddFileOperation('other.tex', File.fromString(''))],
+ new Date(),
+ []
+ )
+ // now append characters to the end of the contents of other.tex
+ const otherEdits = Array.from(
+ { length: 10 },
+ (_, i) =>
+ new Change(
+ [
+ Operation.editFile(
+ 'other.tex',
+ TextOperation.fromJSON({
+ textOperation: i === 0 ? ['a'] : [i, 'a'],
+ })
+ ),
+ ],
+ new Date(),
+ []
+ )
+ )
+ const newFile = new Change(
+ [
+ new AddFileOperation(
+ 'non_bmp.txt',
+ File.fromHash(testFiles.NON_BMP_TXT_HASH)
+ ),
+ ],
+ new Date(),
+ []
+ )
+ const moreOtherEdits = Array.from(
+ { length: 10 },
+ (_, i) =>
+ new Change(
+ [
+ Operation.editFile(
+ 'other.tex',
+ TextOperation.fromJSON({ textOperation: [i + 10, 'a'] })
+ ),
+ ],
+ new Date(),
+ []
+ )
+ )
+
+ await persistChanges(
+ projectId,
+ [
+ textChange,
+ binaryChange,
+ otherChange,
+ ...otherEdits,
+ newFile,
+ ...moreOtherEdits,
+ ],
+ limitsToPersistImmediately,
+ 0
+ )
+ })
+
+ it('should yield correct data for an initial backup', async function () {
+ const results = []
+ for await (const result of backupGenerator(projectId)) {
+ results.push(result)
+ }
+
+ // There should be 3 chunks
+ expect(results).to.have.length(NUM_CHUNKS)
+
+ // First chunk
+ expect(results[0].chunkRecord.startVersion).to.equal(0)
+ expect(results[0].chunkRecord.endVersion).to.equal(10)
+ expect(results[0].blobsToBackup).to.have.deep.members([
+ {
+ hash: testFiles.HELLO_TXT_HASH,
+ byteLength: testFiles.HELLO_TXT_BYTE_LENGTH,
+ stringLength: testFiles.HELLO_TXT_UTF8_LENGTH,
+ },
+ {
+ hash: testFiles.GRAPH_PNG_HASH,
+ byteLength: testFiles.GRAPH_PNG_BYTE_LENGTH,
+ stringLength: null,
+ },
+ {
+ hash: File.EMPTY_FILE_HASH,
+ byteLength: 0,
+ stringLength: 0,
+ },
+ ])
+
+ // Second chunk
+ expect(results[1].chunkRecord.startVersion).to.equal(10)
+ expect(results[1].chunkRecord.endVersion).to.equal(20)
+ expect(results[1].blobsToBackup).to.have.deep.members([
+ {
+ hash: blobHash.fromString('a'.repeat(7)),
+ byteLength: 7,
+ stringLength: 7,
+ },
+ {
+ hash: testFiles.NON_BMP_TXT_HASH,
+ byteLength: testFiles.NON_BMP_TXT_BYTE_LENGTH,
+ stringLength: null,
+ },
+ ])
+
+ // Third chunk
+ expect(results[2].chunkRecord.startVersion).to.equal(20)
+ expect(results[2].chunkRecord.endVersion).to.equal(24)
+ expect(results[2].blobsToBackup).to.have.deep.members([
+ {
+ hash: blobHash.fromString('a'.repeat(16)),
+ byteLength: 16,
+ stringLength: 16,
+ },
+ ])
+ })
+
+ for (
+ let lastBackedUpVersion = 0;
+ lastBackedUpVersion <= FINAL_VERSION;
+ lastBackedUpVersion++
+ ) {
+ it(`should yield the expected data when the last backed up version was ${lastBackedUpVersion}`, async function () {
+ const results = []
+ for await (const result of backupGenerator(
+ projectId,
+ lastBackedUpVersion
+ )) {
+ results.push(result)
+ }
+
+ const chunkDefinitions = [
+ {
+ chunk: { startVersion: 0, endVersion: 10 },
+ blobs: [
+ {
+ version: 1,
+ blob: {
+ hash: testFiles.HELLO_TXT_HASH,
+ byteLength: testFiles.HELLO_TXT_BYTE_LENGTH,
+ stringLength: testFiles.HELLO_TXT_UTF8_LENGTH,
+ },
+ },
+ {
+ version: 2,
+ blob: {
+ hash: testFiles.GRAPH_PNG_HASH,
+ byteLength: testFiles.GRAPH_PNG_BYTE_LENGTH,
+ stringLength: null,
+ },
+ },
+ {
+ version: 3,
+ blob: {
+ hash: File.EMPTY_FILE_HASH,
+ byteLength: 0,
+ stringLength: 0,
+ },
+ },
+ ],
+ },
+ {
+ chunk: { startVersion: 10, endVersion: 20 },
+ blobs: [
+ {
+ version: 11,
+ blob: {
+ hash: blobHash.fromString('a'.repeat(7)),
+ byteLength: 7,
+ stringLength: 7,
+ },
+ },
+ {
+ version: 14,
+ blob: {
+ hash: testFiles.NON_BMP_TXT_HASH,
+ byteLength: testFiles.NON_BMP_TXT_BYTE_LENGTH,
+ stringLength: null,
+ },
+ },
+ ],
+ },
+ {
+ chunk: { startVersion: 20, endVersion: 24 },
+ blobs: [
+ {
+ version: 21,
+ blob: {
+ hash: blobHash.fromString('a'.repeat(16)),
+ byteLength: 16,
+ stringLength: 16,
+ },
+ },
+ ],
+ },
+ ]
+
+ const expectedChunks = chunkDefinitions
+ .filter(({ chunk }) => lastBackedUpVersion < chunk.endVersion)
+ .map(({ chunk }) => chunk)
+ const expectedBlobs = chunkDefinitions
+ .filter(({ chunk }) => lastBackedUpVersion < chunk.endVersion)
+ .map(({ blobs }) =>
+ blobs
+ .filter(({ version }) => lastBackedUpVersion < version)
+ .map(({ blob }) => blob)
+ )
+
+ expect(results).to.have.length(expectedChunks.length)
+ expect(results).to.have.length(expectedBlobs.length)
+
+ results.forEach((result, i) => {
+ expect(result.chunkRecord).to.deep.include(expectedChunks[i])
+ expect(result.blobsToBackup).to.have.deep.members(expectedBlobs[i])
+ })
+ })
+ }
+
+ it(`should not back up blobs that have already been backed up in previous chunks`, async function () {
+ const results = []
+ for await (const result of backupGenerator(projectId)) {
+ results.push(result)
+ }
+ const seenBlobs = new Set()
+ for (const result of results) {
+ for (const blob of result.blobsToBackup) {
+ expect(seenBlobs).to.not.include(blob.hash)
+ seenBlobs.add(blob.hash)
+ }
+ }
+ })
+ })
+}
diff --git a/services/history-v1/test/acceptance/js/storage/blob_store_postgres.test.js b/services/history-v1/test/acceptance/js/storage/blob_store_postgres.test.js
index 0add4fa901..e762c33569 100644
--- a/services/history-v1/test/acceptance/js/storage/blob_store_postgres.test.js
+++ b/services/history-v1/test/acceptance/js/storage/blob_store_postgres.test.js
@@ -8,20 +8,20 @@ describe('BlobStore postgres backend', function () {
const projectId = new ObjectId().toString()
await expect(
postgresBackend.insertBlob(projectId, 'hash', 123, 99)
- ).to.be.rejectedWith(`bad projectId ${projectId}`)
+ ).to.be.rejectedWith('bad projectId')
})
it('deleteBlobs rejects when called with bad projectId', async function () {
const projectId = new ObjectId().toString()
await expect(postgresBackend.deleteBlobs(projectId)).to.be.rejectedWith(
- `bad projectId ${projectId}`
+ 'bad projectId'
)
})
it('findBlobs rejects when called with bad projectId', async function () {
const projectId = new ObjectId().toString()
await expect(postgresBackend.findBlobs(projectId)).to.be.rejectedWith(
- `bad projectId ${projectId}`
+ 'bad projectId'
)
})
@@ -29,14 +29,14 @@ describe('BlobStore postgres backend', function () {
const projectId = new ObjectId().toString()
await expect(
postgresBackend.findBlob(projectId, 'hash')
- ).to.be.rejectedWith(`bad projectId ${projectId}`)
+ ).to.be.rejectedWith('bad projectId')
})
it('getProjectBlobs rejects when called with bad projectId', async function () {
const projectId = new ObjectId().toString()
await expect(
postgresBackend.getProjectBlobs(projectId)
- ).to.be.rejectedWith(`bad projectId ${projectId}`)
+ ).to.be.rejectedWith('bad projectId')
})
})
})
diff --git a/services/history-v1/test/acceptance/js/storage/chunk_store.test.js b/services/history-v1/test/acceptance/js/storage/chunk_store.test.js
index 54d01548b8..df1e36e9cd 100644
--- a/services/history-v1/test/acceptance/js/storage/chunk_store.test.js
+++ b/services/history-v1/test/acceptance/js/storage/chunk_store.test.js
@@ -5,6 +5,11 @@ const fixtures = require('./support/fixtures')
const { expect } = require('chai')
const sinon = require('sinon')
const { ObjectId } = require('mongodb')
+const { projects } = require('../../../../storage/lib/mongodb')
+const {
+ ChunkVersionConflictError,
+ VersionNotFoundError,
+} = require('../../../../storage/lib/chunk_store/errors')
const {
Chunk,
@@ -17,7 +22,8 @@ const {
EditFileOperation,
TextOperation,
} = require('overleaf-editor-core')
-const { chunkStore, historyStore } = require('../../../../storage')
+const { chunkStore, historyStore, BlobStore } = require('../../../../storage')
+const redisBackend = require('../../../../storage/lib/chunk_store/redis')
describe('chunkStore', function () {
beforeEach(cleanup.everything)
@@ -27,20 +33,29 @@ describe('chunkStore', function () {
{
description: 'Postgres backend',
createProject: chunkStore.initializeProject,
+ idMapping: id => parseInt(id, 10),
},
{
description: 'Mongo backend',
createProject: () =>
chunkStore.initializeProject(new ObjectId().toString()),
+ idMapping: id => id,
},
]
for (const scenario of scenarios) {
describe(scenario.description, function () {
let projectId
+ let projectRecord
+ let blobStore
beforeEach(async function () {
projectId = await scenario.createProject()
+ // create a record in the mongo projects collection
+ projectRecord = await projects.insertOne({
+ overleaf: { history: { id: scenario.idMapping(projectId) } },
+ })
+ blobStore = new BlobStore(projectId)
})
it('loads empty latest chunk for a new project', async function () {
@@ -50,23 +65,94 @@ describe('chunkStore', function () {
expect(chunk.getEndTimestamp()).not.to.exist
})
+ describe('creating a chunk', async function () {
+ const pendingChangeTimestamp = new Date('2014-01-01T00:00:00')
+ const lastChangeTimestamp = new Date('2015-01-01T00:00:00')
+ beforeEach(async function () {
+ const blob = await blobStore.putString('abc')
+ const firstChunk = makeChunk(
+ [
+ makeChange(
+ Operation.addFile('main.tex', File.createLazyFromBlobs(blob)),
+ lastChangeTimestamp
+ ),
+ ],
+ 0
+ )
+ await chunkStore.update(projectId, firstChunk, pendingChangeTimestamp)
+
+ const secondChunk = makeChunk(
+ [
+ makeChange(
+ Operation.addFile('other.tex', File.createLazyFromBlobs(blob)),
+ lastChangeTimestamp
+ ),
+ ],
+ 1
+ )
+ await chunkStore.create(
+ projectId,
+ secondChunk,
+ pendingChangeTimestamp
+ )
+ })
+
+ it('creates a chunk and inserts the pending change timestamp', async function () {
+ const project = await projects.findOne({
+ _id: new ObjectId(projectRecord.insertedId),
+ })
+ expect(project.overleaf.history.currentEndVersion).to.equal(2)
+ expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
+ lastChangeTimestamp
+ )
+ expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
+ pendingChangeTimestamp
+ )
+ })
+ })
+
describe('adding and editing a blank file', function () {
const testPathname = 'foo.txt'
const testTextOperation = TextOperation.fromJSON({
textOperation: ['a'],
}) // insert an a
let lastChangeTimestamp
+ const pendingChangeTimestamp = new Date()
beforeEach(async function () {
const chunk = await chunkStore.loadLatest(projectId)
- const oldEndVersion = chunk.getEndVersion()
+ const blob = await blobStore.putString('')
const changes = [
- makeChange(Operation.addFile(testPathname, File.fromString(''))),
+ makeChange(
+ Operation.addFile(testPathname, File.createLazyFromBlobs(blob))
+ ),
makeChange(Operation.editFile(testPathname, testTextOperation)),
]
lastChangeTimestamp = changes[1].getTimestamp()
chunk.pushChanges(changes)
- await chunkStore.update(projectId, oldEndVersion, chunk)
+ await chunkStore.update(projectId, chunk, pendingChangeTimestamp)
+ })
+
+ it('records the correct metadata in db readOnly=false', async function () {
+ const chunkMetadata =
+ await chunkStore.getLatestChunkMetadata(projectId)
+ expect(chunkMetadata).to.deep.include({
+ startVersion: 0,
+ endVersion: 2,
+ endTimestamp: lastChangeTimestamp,
+ })
+ })
+
+ it('records the correct metadata in db readOnly=true', async function () {
+ const chunkMetadata = await chunkStore.getLatestChunkMetadata(
+ projectId,
+ { readOnly: true }
+ )
+ expect(chunkMetadata).to.deep.include({
+ startVersion: 0,
+ endVersion: 2,
+ endTimestamp: lastChangeTimestamp,
+ })
})
it('records the correct timestamp', async function () {
@@ -94,40 +180,55 @@ describe('chunkStore', function () {
expect(editFile).to.be.an.instanceof(EditFileOperation)
expect(editFile.getPathname()).to.equal(testPathname)
})
+
+ it('updates the project record with the current version and timestamps', async function () {
+ const project = await projects.findOne({
+ _id: new ObjectId(projectRecord.insertedId),
+ })
+ expect(project.overleaf.history.currentEndVersion).to.equal(2)
+ expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
+ lastChangeTimestamp
+ )
+ expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
+ pendingChangeTimestamp
+ )
+ })
})
describe('multiple chunks', async function () {
// Two chunks are 1 year apart
+ const pendingChangeTimestamp = new Date('2014-01-01T00:00:00')
const firstChunkTimestamp = new Date('2015-01-01T00:00:00')
const secondChunkTimestamp = new Date('2016-01-01T00:00:00')
const thirdChunkTimestamp = new Date('2017-01-01T00:00:00')
let firstChunk, secondChunk, thirdChunk
beforeEach(async function () {
+ const blob = await blobStore.putString('')
firstChunk = makeChunk(
[
makeChange(
- Operation.addFile('foo.tex', File.fromString('')),
+ Operation.addFile('foo.tex', File.createLazyFromBlobs(blob)),
new Date(firstChunkTimestamp - 5000)
),
makeChange(
- Operation.addFile('bar.tex', File.fromString('')),
+ Operation.addFile('bar.tex', File.createLazyFromBlobs(blob)),
firstChunkTimestamp
),
],
0
)
- await chunkStore.update(projectId, 0, firstChunk)
+ await chunkStore.update(projectId, firstChunk, pendingChangeTimestamp)
firstChunk = await chunkStore.loadLatest(projectId)
secondChunk = makeChunk(
[
makeChange(
- Operation.addFile('baz.tex', File.fromString('')),
+ Operation.addFile('baz.tex', File.createLazyFromBlobs(blob)),
new Date(secondChunkTimestamp - 5000)
),
makeChange(
- Operation.addFile('qux.tex', File.fromString('')),
+ Operation.addFile('qux.tex', File.createLazyFromBlobs(blob)),
secondChunkTimestamp
),
],
@@ -139,7 +240,11 @@ describe('chunkStore', function () {
thirdChunk = makeChunk(
[
makeChange(
- Operation.addFile('quux.tex', File.fromString('')),
+ Operation.addFile('quux.tex', File.createLazyFromBlobs(blob)),
+ thirdChunkTimestamp
+ ),
+ makeChange(
+ Operation.addFile('barbar.tex', File.createLazyFromBlobs(blob)),
thirdChunkTimestamp
),
],
@@ -212,26 +317,123 @@ describe('chunkStore', function () {
expect(chunk).to.deep.equal(thirdChunk)
})
- describe('after updating the last chunk', function () {
- let newChunk
+ it('updates the project record to match the last chunk', async function () {
+ const project = await projects.findOne({
+ _id: new ObjectId(projectRecord.insertedId),
+ })
+ expect(project.overleaf.history.currentEndVersion).to.equal(6)
+ expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
+ thirdChunkTimestamp
+ )
+ })
- beforeEach(async function () {
- newChunk = makeChunk(
+ it('updates the pending change timestamp to match the first chunk', async function () {
+ const project = await projects.findOne({
+ _id: new ObjectId(projectRecord.insertedId),
+ })
+ expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
+ pendingChangeTimestamp
+ )
+ })
+
+ describe('chunk update', function () {
+ it('rejects a chunk that removes changes', async function () {
+ const newChunk = makeChunk([thirdChunk.getChanges()[0]], 4)
+ await expect(
+ chunkStore.update(projectId, newChunk)
+ ).to.be.rejectedWith(ChunkVersionConflictError)
+ const latestChunk = await chunkStore.loadLatest(projectId)
+ expect(latestChunk.toRaw()).to.deep.equal(thirdChunk.toRaw())
+ })
+
+ it('accepts the same chunk', async function () {
+ await chunkStore.update(projectId, thirdChunk)
+ const latestChunk = await chunkStore.loadLatest(projectId)
+ expect(latestChunk.toRaw()).to.deep.equal(thirdChunk.toRaw())
+ })
+
+ it('accepts a larger chunk', async function () {
+ const blob = await blobStore.putString('foobar')
+ const newChunk = makeChunk(
[
...thirdChunk.getChanges(),
makeChange(
- Operation.addFile('onemore.tex', File.fromString('')),
+ Operation.addFile(
+ 'onemore.tex',
+ File.createLazyFromBlobs(blob)
+ ),
thirdChunkTimestamp
),
],
4
)
- await chunkStore.update(projectId, 5, newChunk)
+ await chunkStore.update(projectId, newChunk)
+ const latestChunk = await chunkStore.loadLatest(projectId)
+ expect(latestChunk.toRaw()).to.deep.equal(newChunk.toRaw())
+ })
+ })
+
+ describe('chunk create', function () {
+ let change
+
+ beforeEach(async function () {
+ const blob = await blobStore.putString('foobar')
+ change = makeChange(
+ Operation.addFile('onemore.tex', File.createLazyFromBlobs(blob)),
+ thirdChunkTimestamp
+ )
+ })
+
+ it('rejects a base version that is too low', async function () {
+ const newChunk = makeChunk([change], 5)
+ await expect(
+ chunkStore.create(projectId, newChunk)
+ ).to.be.rejectedWith(ChunkVersionConflictError)
+ const latestChunk = await chunkStore.loadLatest(projectId)
+ expect(latestChunk.toRaw()).to.deep.equal(thirdChunk.toRaw())
+ })
+
+ it('rejects a base version that is too high', async function () {
+ const newChunk = makeChunk([change], 7)
+ await expect(
+ chunkStore.create(projectId, newChunk)
+ ).to.be.rejectedWith(VersionNotFoundError)
+ const latestChunk = await chunkStore.loadLatest(projectId)
+ expect(latestChunk.toRaw()).to.deep.equal(thirdChunk.toRaw())
+ })
+
+ it('accepts the right base version', async function () {
+ const newChunk = makeChunk([change], 6)
+ await chunkStore.create(projectId, newChunk)
+ const latestChunk = await chunkStore.loadLatest(projectId)
+ expect(latestChunk.toRaw()).to.deep.equal(newChunk.toRaw())
+ })
+ })
+
+ describe('after updating the last chunk', function () {
+ let newChunk
+
+ beforeEach(async function () {
+ const blob = await blobStore.putString('')
+ newChunk = makeChunk(
+ [
+ ...thirdChunk.getChanges(),
+ makeChange(
+ Operation.addFile(
+ 'onemore.tex',
+ File.createLazyFromBlobs(blob)
+ ),
+ thirdChunkTimestamp
+ ),
+ ],
+ 4
+ )
+ await chunkStore.update(projectId, newChunk)
newChunk = await chunkStore.loadLatest(projectId)
})
it('replaces the latest chunk', function () {
- expect(newChunk.getChanges()).to.have.length(2)
+ expect(newChunk.getChanges()).to.have.length(3)
})
it('returns the right chunk when querying by version', async function () {
@@ -246,6 +448,260 @@ describe('chunkStore', function () {
)
expect(chunk).to.deep.equal(newChunk)
})
+
+ it('updates the project record to match the latest version and timestamp', async function () {
+ const project = await projects.findOne({
+ _id: new ObjectId(projectRecord.insertedId),
+ })
+ expect(project.overleaf.history.currentEndVersion).to.equal(7)
+ expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
+ thirdChunkTimestamp
+ )
+ })
+
+ it('does not modify the existing pending change timestamp in the project record', async function () {
+ const project = await projects.findOne({
+ _id: new ObjectId(projectRecord.insertedId),
+ })
+ expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
+ pendingChangeTimestamp
+ )
+ })
+ })
+
+ describe('with changes queued in the Redis buffer', function () {
+ let queuedChanges
+ const firstQueuedChangeTimestamp = new Date('2017-01-01T00:01:00')
+ const lastQueuedChangeTimestamp = new Date('2017-01-01T00:02:00')
+
+ beforeEach(async function () {
+ const snapshot = thirdChunk.getSnapshot()
+ snapshot.applyAll(thirdChunk.getChanges())
+ const blob = await blobStore.putString('zzz')
+ queuedChanges = [
+ makeChange(
+ Operation.addFile(
+ 'in-redis.tex',
+ File.createLazyFromBlobs(blob)
+ ),
+ firstQueuedChangeTimestamp
+ ),
+ makeChange(
+ // Add a second change to make the buffer more interesting
+ Operation.editFile(
+ 'in-redis.tex',
+ TextOperation.fromJSON({ textOperation: ['hello'] })
+ ),
+ lastQueuedChangeTimestamp
+ ),
+ ]
+ await redisBackend.queueChanges(
+ projectId,
+ snapshot,
+ thirdChunk.getEndVersion(),
+ queuedChanges
+ )
+ })
+
+ it('includes the queued changes when getting the latest chunk', async function () {
+ const chunk = await chunkStore.loadLatest(projectId)
+ const expectedChanges = thirdChunk
+ .getChanges()
+ .concat(queuedChanges)
+ expect(chunk.getChanges()).to.deep.equal(expectedChanges)
+ expect(chunk.getStartVersion()).to.equal(
+ thirdChunk.getStartVersion()
+ )
+ expect(chunk.getEndVersion()).to.equal(
+ thirdChunk.getEndVersion() + queuedChanges.length
+ )
+ expect(chunk.getEndTimestamp()).to.deep.equal(
+ lastQueuedChangeTimestamp
+ )
+ })
+
+ it('includes the queued changes when getting the latest chunk by timestamp', async function () {
+ const chunk = await chunkStore.loadAtTimestamp(
+ projectId,
+ thirdChunkTimestamp
+ )
+ const expectedChanges = thirdChunk
+ .getChanges()
+ .concat(queuedChanges)
+ expect(chunk.getChanges()).to.deep.equal(expectedChanges)
+ expect(chunk.getStartVersion()).to.equal(
+ thirdChunk.getStartVersion()
+ )
+ expect(chunk.getEndVersion()).to.equal(
+ thirdChunk.getEndVersion() + queuedChanges.length
+ )
+ })
+
+ it("doesn't include the queued changes when getting another chunk by timestamp", async function () {
+ const chunk = await chunkStore.loadAtTimestamp(
+ projectId,
+ secondChunkTimestamp
+ )
+ const expectedChanges = secondChunk.getChanges()
+ expect(chunk.getChanges()).to.deep.equal(expectedChanges)
+ expect(chunk.getStartVersion()).to.equal(
+ secondChunk.getStartVersion()
+ )
+ expect(chunk.getEndVersion()).to.equal(secondChunk.getEndVersion())
+ expect(chunk.getEndTimestamp()).to.deep.equal(secondChunkTimestamp)
+ })
+
+ it('includes the queued changes when getting the latest chunk by version', async function () {
+ const chunk = await chunkStore.loadAtVersion(
+ projectId,
+ thirdChunk.getEndVersion()
+ )
+ const expectedChanges = thirdChunk
+ .getChanges()
+ .concat(queuedChanges)
+ expect(chunk.getChanges()).to.deep.equal(expectedChanges)
+ expect(chunk.getStartVersion()).to.equal(
+ thirdChunk.getStartVersion()
+ )
+ expect(chunk.getEndVersion()).to.equal(
+ thirdChunk.getEndVersion() + queuedChanges.length
+ )
+ expect(chunk.getEndTimestamp()).to.deep.equal(
+ lastQueuedChangeTimestamp
+ )
+ })
+
+ it("doesn't include the queued changes when getting another chunk by version", async function () {
+ const chunk = await chunkStore.loadAtVersion(
+ projectId,
+ secondChunk.getEndVersion()
+ )
+ const expectedChanges = secondChunk.getChanges()
+ expect(chunk.getChanges()).to.deep.equal(expectedChanges)
+ expect(chunk.getStartVersion()).to.equal(
+ secondChunk.getStartVersion()
+ )
+ expect(chunk.getEndVersion()).to.equal(secondChunk.getEndVersion())
+ expect(chunk.getEndTimestamp()).to.deep.equal(secondChunkTimestamp)
+ })
+
+ it('loads a version that is only in the Redis buffer', async function () {
+ const versionInRedis = thirdChunk.getEndVersion() + 1 // the first change in Redis
+ const chunk = await chunkStore.loadAtVersion(
+ projectId,
+ versionInRedis
+ )
+ // The chunk should contain changes from the thirdChunk and the queuedChanges
+ const expectedChanges = thirdChunk
+ .getChanges()
+ .concat(queuedChanges)
+ expect(chunk.getChanges()).to.deep.equal(expectedChanges)
+ expect(chunk.getStartVersion()).to.equal(
+ thirdChunk.getStartVersion()
+ )
+ expect(chunk.getEndVersion()).to.equal(
+ thirdChunk.getEndVersion() + queuedChanges.length
+ )
+ expect(chunk.getEndTimestamp()).to.deep.equal(
+ lastQueuedChangeTimestamp
+ )
+ })
+
+ it('throws an error when loading a version beyond the Redis buffer', async function () {
+ const versionBeyondRedis =
+ thirdChunk.getEndVersion() + queuedChanges.length + 1
+ await expect(
+ chunkStore.loadAtVersion(projectId, versionBeyondRedis)
+ )
+ .to.be.rejectedWith(chunkStore.VersionOutOfBoundsError)
+ .and.eventually.satisfy(err => {
+ expect(err.info).to.have.property('projectId', projectId)
+ expect(err.info).to.have.property('version', versionBeyondRedis)
+ return true
+ })
+ })
+ })
+
+ describe('when iterating the chunks with getProjectChunksFromVersion', function () {
+ // The first chunk has startVersion:0 and endVersion:2
+ for (let startVersion = 0; startVersion <= 2; startVersion++) {
+ it(`returns all chunk records when starting from version ${startVersion}`, async function () {
+ const chunkRecords = []
+ for await (const chunk of chunkStore.getProjectChunksFromVersion(
+ projectId,
+ startVersion
+ )) {
+ chunkRecords.push(chunk)
+ }
+ const expectedChunks = [firstChunk, secondChunk, thirdChunk]
+ expect(chunkRecords).to.have.length(expectedChunks.length)
+ chunkRecords.forEach((chunkRecord, index) => {
+ expect(chunkRecord.startVersion).to.deep.equal(
+ expectedChunks[index].getStartVersion()
+ )
+ expect(chunkRecord.endVersion).to.deep.equal(
+ expectedChunks[index].getEndVersion()
+ )
+ })
+ })
+ }
+
+ // The second chunk has startVersion:2 and endVersion:4
+ for (let startVersion = 3; startVersion <= 4; startVersion++) {
+ it(`returns two chunk records when starting from version ${startVersion}`, async function () {
+ const chunkRecords = []
+ for await (const chunk of chunkStore.getProjectChunksFromVersion(
+ projectId,
+ startVersion
+ )) {
+ chunkRecords.push(chunk)
+ }
+ const expectedChunks = [secondChunk, thirdChunk]
+ expect(chunkRecords).to.have.length(expectedChunks.length)
+ chunkRecords.forEach((chunkRecord, index) => {
+ expect(chunkRecord.startVersion).to.deep.equal(
+ expectedChunks[index].getStartVersion()
+ )
+ expect(chunkRecord.endVersion).to.deep.equal(
+ expectedChunks[index].getEndVersion()
+ )
+ })
+ })
+ }
+
+ // The third chunk has startVersion:4 and endVersion:5
+ for (let startVersion = 5; startVersion <= 5; startVersion++) {
+ it(`returns one chunk record when starting from version ${startVersion}`, async function () {
+ const chunkRecords = []
+ for await (const chunk of chunkStore.getProjectChunksFromVersion(
+ projectId,
+ startVersion
+ )) {
+ chunkRecords.push(chunk)
+ }
+ const expectedChunks = [thirdChunk]
+ expect(chunkRecords).to.have.length(expectedChunks.length)
+ chunkRecords.forEach((chunkRecord, index) => {
+ expect(chunkRecord.startVersion).to.deep.equal(
+ expectedChunks[index].getStartVersion()
+ )
+ expect(chunkRecord.endVersion).to.deep.equal(
+ expectedChunks[index].getEndVersion()
+ )
+ })
+ })
+ }
+
+ it('returns no chunk records when starting from a version after the last chunk', async function () {
+ const chunkRecords = []
+ for await (const chunk of chunkStore.getProjectChunksFromVersion(
+ projectId,
+ 7
+ )) {
+ chunkRecords.push(chunk)
+ }
+ expect(chunkRecords).to.have.length(0)
+ })
})
})
@@ -268,15 +724,18 @@ describe('chunkStore', function () {
let chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getEndVersion()).to.equal(oldEndVersion)
+ const blob = await blobStore.putString('')
const changes = [
- makeChange(Operation.addFile(testPathname, File.fromString(''))),
+ makeChange(
+ Operation.addFile(testPathname, File.createLazyFromBlobs(blob))
+ ),
makeChange(Operation.editFile(testPathname, testTextOperation)),
]
chunk.pushChanges(changes)
- await expect(
- chunkStore.update(projectId, oldEndVersion, chunk)
- ).to.be.rejectedWith('S3 Error')
+ await expect(chunkStore.update(projectId, chunk)).to.be.rejectedWith(
+ 'S3 Error'
+ )
chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getEndVersion()).to.equal(oldEndVersion)
})
@@ -285,9 +744,12 @@ describe('chunkStore', function () {
describe('version checks', function () {
beforeEach(async function () {
// Create a chunk with start version 0, end version 3
+ const blob = await blobStore.putString('abc')
const chunk = makeChunk(
[
- makeChange(Operation.addFile('main.tex', File.fromString('abc'))),
+ makeChange(
+ Operation.addFile('main.tex', File.createLazyFromBlobs(blob))
+ ),
makeChange(
Operation.editFile(
'main.tex',
@@ -303,12 +765,17 @@ describe('chunkStore', function () {
],
0
)
- await chunkStore.update(projectId, 0, chunk)
+ await chunkStore.update(projectId, chunk)
})
it('refuses to create a chunk with the same start version', async function () {
+ const blob = await blobStore.putString('abc')
const chunk = makeChunk(
- [makeChange(Operation.addFile('main.tex', File.fromString('abc')))],
+ [
+ makeChange(
+ Operation.addFile('main.tex', File.createLazyFromBlobs(blob))
+ ),
+ ],
0
)
await expect(chunkStore.create(projectId, chunk)).to.be.rejectedWith(
@@ -317,8 +784,13 @@ describe('chunkStore', function () {
})
it("allows creating chunks that don't have version conflicts", async function () {
+ const blob = await blobStore.putString('abc')
const chunk = makeChunk(
- [makeChange(Operation.addFile('main.tex', File.fromString('abc')))],
+ [
+ makeChange(
+ Operation.addFile('main.tex', File.createLazyFromBlobs(blob))
+ ),
+ ],
3
)
await chunkStore.create(projectId, chunk)
diff --git a/services/history-v1/test/acceptance/js/storage/chunk_store_mongo_backend.test.js b/services/history-v1/test/acceptance/js/storage/chunk_store_mongo_backend.test.js
index 61d80810f1..98cdd2db4d 100644
--- a/services/history-v1/test/acceptance/js/storage/chunk_store_mongo_backend.test.js
+++ b/services/history-v1/test/acceptance/js/storage/chunk_store_mongo_backend.test.js
@@ -1,8 +1,16 @@
const { expect } = require('chai')
const { ObjectId } = require('mongodb')
-const { Chunk, Snapshot, History } = require('overleaf-editor-core')
+const {
+ Chunk,
+ Snapshot,
+ History,
+ Change,
+ AddFileOperation,
+ File,
+} = require('overleaf-editor-core')
const cleanup = require('./support/cleanup')
const backend = require('../../../../storage/lib/chunk_store/mongo')
+const { ChunkVersionConflictError } = require('../../../../storage')
describe('chunk store Mongo backend', function () {
beforeEach(cleanup.everything)
@@ -42,11 +50,86 @@ describe('chunk store Mongo backend', function () {
expect(oldChunks).to.deep.equal([])
})
})
+
+ describe('concurrency handling', function () {
+ it('prevents chunks from being created with the same start version', async function () {
+ const projectId = new ObjectId().toString()
+ const chunks = [makeChunk([], 10), makeChunk([], 10)]
+
+ const chunkIds = []
+ for (const chunk of chunks) {
+ const chunkId = await backend.insertPendingChunk(projectId, chunk)
+ chunkIds.push(chunkId)
+ }
+
+ await backend.confirmCreate(projectId, chunks[0], chunkIds[0])
+ await expect(
+ backend.confirmCreate(projectId, chunks[1], chunkIds[1])
+ ).to.be.rejectedWith(ChunkVersionConflictError)
+ })
+
+ describe('conflicts between chunk extension and chunk creation', function () {
+ let projectId,
+ baseChunkId,
+ updatedChunkId,
+ newChunkId,
+ updatedChunk,
+ newChunk
+
+ beforeEach(async function () {
+ projectId = new ObjectId().toString()
+ const baseChunk = makeChunk([], 0)
+ baseChunkId = await backend.insertPendingChunk(projectId, baseChunk)
+ await backend.confirmCreate(projectId, baseChunk, baseChunkId)
+
+ const change = new Change(
+ [new AddFileOperation('main.tex', File.fromString('hello'))],
+ new Date()
+ )
+
+ updatedChunk = makeChunk([change], 0)
+ updatedChunkId = await backend.insertPendingChunk(
+ projectId,
+ updatedChunk
+ )
+ newChunk = makeChunk([change], 1)
+ newChunkId = await backend.insertPendingChunk(projectId, newChunk)
+ })
+
+ it('prevents creation after extension', async function () {
+ await backend.confirmUpdate(
+ projectId,
+ baseChunkId,
+ updatedChunk,
+ updatedChunkId
+ )
+ await expect(
+ backend.confirmCreate(projectId, newChunk, newChunkId, {
+ oldChunkId: baseChunkId,
+ })
+ ).to.be.rejectedWith(ChunkVersionConflictError)
+ })
+
+ it('prevents extension after creation', async function () {
+ await backend.confirmCreate(projectId, newChunk, newChunkId, {
+ oldChunkId: baseChunkId,
+ })
+ await expect(
+ backend.confirmUpdate(
+ projectId,
+ baseChunkId,
+ updatedChunk,
+ updatedChunkId
+ )
+ ).to.be.rejectedWith(ChunkVersionConflictError)
+ })
+ })
+ })
})
function makeChunk(changes, versionNumber) {
const snapshot = Snapshot.fromRaw({ files: {} })
- const history = new History(snapshot, [])
+ const history = new History(snapshot, changes)
const chunk = new Chunk(history, versionNumber)
return chunk
}
diff --git a/services/history-v1/test/acceptance/js/storage/chunk_store_postgres_backend.test.js b/services/history-v1/test/acceptance/js/storage/chunk_store_postgres_backend.test.js
new file mode 100644
index 0000000000..cd1d705bdc
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/chunk_store_postgres_backend.test.js
@@ -0,0 +1,110 @@
+const { expect } = require('chai')
+const { ObjectId } = require('mongodb')
+const {
+ Chunk,
+ Snapshot,
+ History,
+ Change,
+ AddFileOperation,
+ File,
+} = require('overleaf-editor-core')
+const cleanup = require('./support/cleanup')
+const { ChunkVersionConflictError } = require('../../../../storage')
+const backend = require('../../../../storage/lib/chunk_store/postgres')
+
+describe('chunk store Postgres backend', function () {
+ beforeEach(cleanup.everything)
+
+ it('should reject ObjectId strings as project IDs', async function () {
+ const invalidProjectId = new ObjectId().toString()
+
+ await expect(backend.getLatestChunk(invalidProjectId)).to.be.rejectedWith(
+ 'bad projectId'
+ )
+ await expect(
+ backend.getChunkForVersion(invalidProjectId, 1)
+ ).to.be.rejectedWith('bad projectId')
+ await expect(
+ backend.getChunkForTimestamp(invalidProjectId, new Date())
+ ).to.be.rejectedWith('bad projectId')
+ await expect(
+ backend.getProjectChunkIds(invalidProjectId)
+ ).to.be.rejectedWith('bad projectId')
+ await expect(
+ backend.insertPendingChunk(invalidProjectId, makeChunk([], 0))
+ ).to.be.rejectedWith('bad projectId')
+ await expect(
+ backend.confirmCreate(invalidProjectId, makeChunk([], 0), 1)
+ ).to.be.rejectedWith('bad projectId')
+ await expect(
+ backend.confirmUpdate(invalidProjectId, 1, makeChunk([], 0), 2)
+ ).to.be.rejectedWith('bad projectId')
+ await expect(backend.deleteChunk(invalidProjectId, 1)).to.be.rejectedWith(
+ 'bad projectId'
+ )
+ await expect(
+ backend.deleteProjectChunks(invalidProjectId)
+ ).to.be.rejectedWith('bad projectId')
+ })
+
+ describe('conflicts between chunk extension and chunk creation', function () {
+ let projectId,
+ baseChunkId,
+ updatedChunkId,
+ newChunkId,
+ updatedChunk,
+ newChunk
+
+ beforeEach(async function () {
+ projectId = '1234'
+ const baseChunk = makeChunk([], 0)
+ baseChunkId = await backend.insertPendingChunk(projectId, baseChunk)
+ await backend.confirmCreate(projectId, baseChunk, baseChunkId)
+
+ const change = new Change(
+ [new AddFileOperation('main.tex', File.fromString('hello'))],
+ new Date()
+ )
+
+ updatedChunk = makeChunk([change], 0)
+ updatedChunkId = await backend.insertPendingChunk(projectId, updatedChunk)
+ newChunk = makeChunk([change], 1)
+ newChunkId = await backend.insertPendingChunk(projectId, newChunk)
+ })
+
+ it('prevents creation after extension', async function () {
+ await backend.confirmUpdate(
+ projectId,
+ baseChunkId,
+ updatedChunk,
+ updatedChunkId
+ )
+ await expect(
+ backend.confirmCreate(projectId, newChunk, newChunkId, {
+ oldChunkId: baseChunkId,
+ })
+ ).to.be.rejectedWith(ChunkVersionConflictError)
+ })
+
+ it('prevents extension after creation', async function () {
+ await backend.confirmCreate(projectId, newChunk, newChunkId, {
+ oldChunkId: baseChunkId,
+ })
+ await expect(
+ backend.confirmUpdate(
+ projectId,
+ baseChunkId,
+ updatedChunk,
+ updatedChunkId
+ )
+ ).to.be.rejectedWith(ChunkVersionConflictError)
+ })
+ })
+})
+
+function makeChunk(changes, versionNumber) {
+ const snapshot = Snapshot.fromRaw({ files: {} })
+ const history = new History(snapshot, [])
+ const chunk = new Chunk(history, versionNumber)
+ return chunk
+}
diff --git a/services/history-v1/test/acceptance/js/storage/chunk_store_redis_backend.test.js b/services/history-v1/test/acceptance/js/storage/chunk_store_redis_backend.test.js
new file mode 100644
index 0000000000..fc176de192
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/chunk_store_redis_backend.test.js
@@ -0,0 +1,1315 @@
+'use strict'
+
+const { expect } = require('chai')
+const {
+ Snapshot,
+ Change,
+ AddFileOperation,
+ File,
+} = require('overleaf-editor-core')
+const cleanup = require('./support/cleanup')
+const redisBackend = require('../../../../storage/lib/chunk_store/redis')
+const {
+ JobNotReadyError,
+ JobNotFoundError,
+ VersionOutOfBoundsError,
+} = require('../../../../storage/lib/chunk_store/errors')
+const redis = require('../../../../storage/lib/redis')
+const rclient = redis.rclientHistory
+const keySchema = redisBackend.keySchema
+
+describe('chunk buffer Redis backend', function () {
+ beforeEach(cleanup.everything)
+ const projectId = 'project123'
+
+ describe('getHeadSnapshot', function () {
+ it('should return null on cache miss', async function () {
+ const result = await redisBackend.getHeadSnapshot(projectId)
+ expect(result).to.be.null
+ })
+
+ it('should return the cached head snapshot and version', async function () {
+ // Create a sample snapshot and version
+ const snapshot = new Snapshot()
+ const version = 42
+ const rawSnapshot = JSON.stringify(snapshot.toRaw())
+
+ // Manually set the data in Redis
+ await rclient.set(keySchema.head({ projectId }), rawSnapshot)
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ version.toString()
+ )
+
+ // Retrieve the cached snapshot
+ const result = await redisBackend.getHeadSnapshot(projectId)
+
+ expect(result).to.not.be.null
+ expect(result.version).to.equal(version)
+ expect(result.snapshot).to.deep.equal(snapshot) // Use deep equal for object comparison
+ })
+
+ it('should return null if the version is missing', async function () {
+ // Create a sample snapshot
+ const snapshot = new Snapshot()
+ const rawSnapshot = JSON.stringify(snapshot.toRaw())
+
+ // Manually set only the snapshot data in Redis
+ await rclient.set(keySchema.head({ projectId }), rawSnapshot)
+
+ // Attempt to retrieve the snapshot
+ const result = await redisBackend.getHeadSnapshot(projectId)
+
+ expect(result).to.be.null
+ })
+ })
+
+ describe('queueChanges', function () {
+ it('should queue changes when the base version matches head version', async function () {
+ // Create base version
+ const baseVersion = 0
+
+ // Create a new head snapshot that will be set after changes
+ const headSnapshot = new Snapshot()
+
+ // Create changes
+ const timestamp = new Date()
+ const change = new Change([], timestamp, [])
+
+ // Set times
+ const now = Date.now()
+ const persistTime = now + 30 * 1000 // 30 seconds from now
+ const expireTime = now + 60 * 60 * 1000 // 1 hour from now
+
+ // Queue the changes
+ await redisBackend.queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ [change],
+ { persistTime, expireTime }
+ )
+
+ // Get the state to verify the changes
+ const state = await redisBackend.getState(projectId)
+
+ // Verify the result
+ expect(state).to.exist
+ expect(state.headVersion).to.equal(baseVersion + 1)
+ expect(state.headSnapshot).to.deep.equal(headSnapshot.toRaw())
+ expect(state.persistTime).to.equal(persistTime)
+ expect(state.expireTime).to.equal(expireTime)
+ })
+
+ it('should throw BaseVersionConflictError when base version does not match head version', async function () {
+ // Create a mismatch scenario
+ const headSnapshot = new Snapshot()
+ const baseVersion = 0
+
+ // Manually set a different head version in Redis
+ await rclient.set(keySchema.headVersion({ projectId }), '5')
+
+ // Create changes
+ const timestamp = new Date()
+ const change = new Change([], timestamp, [])
+
+ // Set times
+ const now = Date.now()
+ const persistTime = now + 30 * 1000
+ const expireTime = now + 60 * 60 * 1000
+
+ // Attempt to queue the changes with a mismatched base version
+ // This should throw a BaseVersionConflictError
+ try {
+ await redisBackend.queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ [change],
+ { persistTime, expireTime }
+ )
+ // If we get here, the test should fail
+ expect.fail('Expected BaseVersionConflictError but no error was thrown')
+ } catch (err) {
+ expect(err.name).to.equal('BaseVersionConflictError')
+ expect(err.info).to.deep.include({
+ projectId,
+ baseVersion,
+ })
+ }
+ })
+
+ it('should throw error when given an empty changes array', async function () {
+ // Create a valid scenario but with empty changes
+ const headSnapshot = new Snapshot()
+ const baseVersion = 0
+
+ // Set times
+ const now = Date.now()
+ const persistTime = now + 30 * 1000
+ const expireTime = now + 60 * 60 * 1000
+
+ // Attempt to queue with empty changes array
+ try {
+ await redisBackend.queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ [], // Empty changes array
+ { persistTime, expireTime }
+ )
+ // If we get here, the test should fail
+ expect.fail('Expected Error but no error was thrown')
+ } catch (err) {
+ expect(err.message).to.equal('Cannot queue empty changes array')
+ }
+ })
+
+ it('should queue multiple changes and increment version correctly', async function () {
+ // Create base version
+ const baseVersion = 0
+
+ // Create a new head snapshot
+ const headSnapshot = new Snapshot()
+
+ // Create multiple changes
+ const timestamp = new Date()
+ const change1 = new Change([], timestamp)
+ const change2 = new Change([], timestamp)
+ const change3 = new Change([], timestamp)
+
+ // Set times
+ const now = Date.now()
+ const persistTime = now + 30 * 1000
+ const expireTime = now + 60 * 60 * 1000
+
+ // Queue the changes
+ await redisBackend.queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ [change1, change2, change3], // Multiple changes
+ { persistTime, expireTime }
+ )
+
+ // Get the state to verify the changes
+ const state = await redisBackend.getState(projectId)
+
+ // Verify that version was incremented by the number of changes
+ expect(state.headVersion).to.equal(baseVersion + 3)
+ expect(state.headSnapshot).to.deep.equal(headSnapshot.toRaw())
+ })
+
+ it('should use the provided persistTime only if it is sooner than existing time', async function () {
+ // Create base version
+ const baseVersion = 0
+
+ // Create a new head snapshot
+ const headSnapshot = new Snapshot()
+
+ // Create changes
+ const timestamp = new Date()
+ const change = new Change([], timestamp)
+
+ // Set times
+ const now = Date.now()
+ const earlierPersistTime = now + 15 * 1000 // 15 seconds from now
+ const laterPersistTime = now + 30 * 1000 // 30 seconds from now
+ const expireTime = now + 60 * 60 * 1000 // 1 hour from now
+
+ // First queue changes with the later persist time
+ await redisBackend.queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ [change],
+ { persistTime: laterPersistTime, expireTime }
+ )
+
+ // Get the state to verify the first persist time was set
+ let state = await redisBackend.getState(projectId)
+ expect(state.persistTime).to.equal(laterPersistTime)
+
+ // Queue more changes with an earlier persist time
+ const newerHeadSnapshot = new Snapshot()
+ await redisBackend.queueChanges(
+ projectId,
+ newerHeadSnapshot,
+ baseVersion + 1, // Updated base version
+ [change],
+ {
+ persistTime: earlierPersistTime, // Earlier time should replace the later one
+ expireTime,
+ }
+ )
+
+ // Get the state to verify the persist time was updated to the earlier time
+ state = await redisBackend.getState(projectId)
+ expect(state.persistTime).to.equal(earlierPersistTime)
+
+ // Queue more changes with another later persist time
+ const evenNewerHeadSnapshot = new Snapshot()
+ await redisBackend.queueChanges(
+ projectId,
+ evenNewerHeadSnapshot,
+ baseVersion + 2, // Updated base version
+ [change],
+ {
+ persistTime: laterPersistTime, // Later time should not replace the earlier one
+ expireTime,
+ }
+ )
+
+ // Get the state to verify the persist time remains at the earlier time
+ state = await redisBackend.getState(projectId)
+ expect(state.persistTime).to.equal(earlierPersistTime) // Should still be the earlier time
+ })
+
+ it('should ignore changes when onlyIfExists is true and project does not exist', async function () {
+ // Create base version
+ const baseVersion = 10
+
+ // Create a new head snapshot
+ const headSnapshot = new Snapshot()
+
+ // Create changes
+ const timestamp = new Date()
+ const change = new Change([], timestamp)
+
+ // Set times
+ const now = Date.now()
+ const persistTime = now + 30 * 1000
+ const expireTime = now + 60 * 60 * 1000
+
+ // Queue changes with onlyIfExists set to true
+ const result = await redisBackend.queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ [change],
+ { persistTime, expireTime, onlyIfExists: true }
+ )
+
+ // Should return 'ignore' status
+ expect(result).to.equal('ignore')
+
+ // Get the state - should be empty/null
+ const state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.be.null
+ expect(state.headSnapshot).to.be.null
+ })
+
+ it('should queue changes when onlyIfExists is true and project exists', async function () {
+ // First create the project
+ const headSnapshot = new Snapshot()
+ const baseVersion = 10
+ const timestamp = new Date()
+ const change1 = new Change([], timestamp)
+
+ // Set times
+ const now = Date.now()
+ const persistTime = now + 30 * 1000
+ const expireTime = now + 60 * 60 * 1000
+
+ // Create the project first
+ await redisBackend.queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ [change1],
+ { persistTime, expireTime }
+ )
+
+ // Now create another change with onlyIfExists set to true
+ const newerSnapshot = new Snapshot()
+ const change2 = new Change([], timestamp)
+
+ // Queue changes with onlyIfExists set to true
+ const result = await redisBackend.queueChanges(
+ projectId,
+ newerSnapshot,
+ baseVersion + 1, // Version should be 1 after the first change
+ [change2],
+ { persistTime, expireTime, onlyIfExists: true }
+ )
+
+ // Should return 'ok' status
+ expect(result).to.equal('ok')
+
+ // Get the state to verify the changes were applied
+ const state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.equal(baseVersion + 2) // Should be 2 after both changes
+ expect(state.headSnapshot).to.deep.equal(newerSnapshot.toRaw())
+ })
+
+ it('should queue changes when onlyIfExists is false and project does not exist', async function () {
+ // Create base version
+ const baseVersion = 10
+
+ // Create a new head snapshot
+ const headSnapshot = new Snapshot()
+
+ // Create changes
+ const timestamp = new Date()
+ const change = new Change([], timestamp)
+
+ // Set times
+ const now = Date.now()
+ const persistTime = now + 30 * 1000
+ const expireTime = now + 60 * 60 * 1000
+
+ // Queue changes with onlyIfExists explicitly set to false
+ const result = await redisBackend.queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ [change],
+ { persistTime, expireTime, onlyIfExists: false }
+ )
+
+ // Should return 'ok' status
+ expect(result).to.equal('ok')
+
+ // Get the state to verify the project was created
+ const state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.equal(baseVersion + 1)
+ expect(state.headSnapshot).to.deep.equal(headSnapshot.toRaw())
+ })
+ })
+
+ describe('getChangesSinceVersion', function () {
+ it('should return not_found when project does not exist', async function () {
+ const result = await redisBackend.getChangesSinceVersion(projectId, 1)
+ expect(result.status).to.equal('not_found')
+ })
+
+ it('should return empty array when requested version equals head version', async function () {
+ // Set head version
+ const headVersion = 5
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+
+ // Request changes since the current head version
+ const result = await redisBackend.getChangesSinceVersion(
+ projectId,
+ headVersion
+ )
+
+ expect(result.status).to.equal('ok')
+ expect(result.changes).to.be.an('array').that.is.empty
+ })
+
+ it('should return out_of_bounds when requested version is greater than head version', async function () {
+ // Set head version
+ const headVersion = 5
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+
+ // Request changes with version larger than head
+ const result = await redisBackend.getChangesSinceVersion(
+ projectId,
+ headVersion + 1
+ )
+
+ expect(result.status).to.equal('out_of_bounds')
+ })
+
+ it('should return out_of_bounds when requested version is too old', async function () {
+ // Set head version
+ const headVersion = 10
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+
+ // Create a few changes but less than what we'd need to reach requested version
+ const timestamp = new Date()
+ const change1 = new Change([], timestamp)
+ const change2 = new Change([], timestamp)
+ await rclient.rpush(
+ keySchema.changes({ projectId }),
+ JSON.stringify(change1.toRaw()),
+ JSON.stringify(change2.toRaw())
+ )
+
+ // Request changes from version 5, which is too old (headVersion - changesCount = 10 - 2 = 8)
+ const result = await redisBackend.getChangesSinceVersion(projectId, 5)
+
+ expect(result.status).to.equal('out_of_bounds')
+ })
+
+ it('should return changes since requested version', async function () {
+ // Set head version
+ const headVersion = 5
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+
+ // Create changes
+ const timestamp = new Date()
+ const change1 = new Change([], timestamp)
+ const change2 = new Change([], timestamp)
+ const change3 = new Change([], timestamp)
+
+ // Push changes to Redis (representing versions 3, 4, and 5)
+ await rclient.rpush(
+ keySchema.changes({ projectId }),
+ JSON.stringify(change1.toRaw()),
+ JSON.stringify(change2.toRaw()),
+ JSON.stringify(change3.toRaw())
+ )
+
+ // Request changes since version 3 (should return changes for versions 4 and 5)
+ const result = await redisBackend.getChangesSinceVersion(projectId, 3)
+
+ expect(result.status).to.equal('ok')
+ expect(result.changes).to.be.an('array').with.lengthOf(2)
+
+ // The changes array should contain the raw changes
+ // Note: We're comparing raw objects, not the Change instances
+ expect(result.changes[0]).to.deep.equal(change2.toRaw())
+ expect(result.changes[1]).to.deep.equal(change3.toRaw())
+ })
+
+ it('should return all changes when requested version is earliest available', async function () {
+ // Set head version to 5
+ const headVersion = 5
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+
+ // Create changes
+ const timestamp = new Date()
+ const change1 = new Change([], timestamp)
+ const change2 = new Change([], timestamp)
+ const change3 = new Change([], timestamp)
+
+ // Push changes to Redis (representing versions 3, 4, and 5)
+ await rclient.rpush(
+ keySchema.changes({ projectId }),
+ JSON.stringify(change1.toRaw()),
+ JSON.stringify(change2.toRaw()),
+ JSON.stringify(change3.toRaw())
+ )
+
+ // Request changes since version 2 (should return all 3 changes)
+ const result = await redisBackend.getChangesSinceVersion(projectId, 2)
+
+ expect(result.status).to.equal('ok')
+ expect(result.changes).to.be.an('array').with.lengthOf(3)
+ expect(result.changes[0]).to.deep.equal(change1.toRaw())
+ expect(result.changes[1]).to.deep.equal(change2.toRaw())
+ expect(result.changes[2]).to.deep.equal(change3.toRaw())
+ })
+ })
+
+ describe('getNonPersistedChanges', function () {
+ describe('project not loaded', function () {
+ it('should return empty array', async function () {
+ const changes = await redisBackend.getNonPersistedChanges(projectId, 0)
+ expect(changes).to.be.an('array').that.is.empty
+ })
+
+ it('should handle any base version', async function () {
+ const changes = await redisBackend.getNonPersistedChanges(projectId, 2)
+ expect(changes).to.be.an('array').that.is.empty
+ })
+ })
+
+ describe('project never persisted', function () {
+ let changes
+
+ beforeEach(async function () {
+ changes = await setupState(projectId, {
+ headVersion: 5,
+ persistedVersion: null,
+ changes: 3,
+ })
+ })
+
+ it('should return all changes if requested', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 2
+ )
+ expect(nonPersistedChanges).to.deep.equal(changes)
+ })
+
+ it('should return part of the changes following a given base version if requested', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 3
+ )
+ expect(nonPersistedChanges).to.deep.equal(changes.slice(1))
+ })
+
+ it('should limit the number of changes returned if requested', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 2,
+ { maxChanges: 2 }
+ )
+ expect(nonPersistedChanges).to.deep.equal(changes.slice(0, 2))
+ })
+
+ it('should return all changes if limit is not reached', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 3,
+ { maxChanges: 10 }
+ )
+ expect(nonPersistedChanges).to.deep.equal(changes.slice(1))
+ })
+
+ it('should error if the base version requested is too low', async function () {
+ await expect(
+ redisBackend.getNonPersistedChanges(projectId, 0)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ })
+
+ it('should return an empty array if the base version is the head version', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 5
+ )
+ expect(nonPersistedChanges).to.deep.equal([])
+ })
+
+ it('should error if the base version requested is too high', async function () {
+ await expect(
+ redisBackend.getNonPersistedChanges(projectId, 6)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ })
+ })
+
+ describe('fully persisted changes', function () {
+ beforeEach(async function () {
+ await setupState(projectId, {
+ headVersion: 5,
+ persistedVersion: 5,
+ changes: 3,
+ })
+ })
+
+ it('should return an empty array when asked for the head version', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 5
+ )
+ expect(nonPersistedChanges).to.deep.equal([])
+ })
+
+ it('should throw an error when asked for an older version', async function () {
+ await expect(
+ redisBackend.getNonPersistedChanges(projectId, 4)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ })
+
+ it('should throw an error when asked for a newer version', async function () {
+ await expect(
+ redisBackend.getNonPersistedChanges(projectId, 6)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ })
+ })
+
+ describe('partially persisted project', async function () {
+ let changes
+
+ beforeEach(async function () {
+ changes = await setupState(projectId, {
+ headVersion: 10,
+ persistedVersion: 7,
+ changes: 6,
+ })
+ })
+
+ it('should return all non-persisted changes if requested', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 7
+ )
+ expect(nonPersistedChanges).to.deep.equal(changes.slice(3))
+ })
+
+ it('should return part of the changes if requested', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 8
+ )
+ expect(nonPersistedChanges).to.deep.equal(changes.slice(4))
+ })
+
+ it('should error if the base version requested is too low', async function () {
+ await expect(
+ redisBackend.getNonPersistedChanges(projectId, 5)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ })
+
+ it('should return an empty array if the base version is the head version', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 10
+ )
+ expect(nonPersistedChanges).to.deep.equal([])
+ })
+
+ it('should error if the base version requested is too high', async function () {
+ await expect(
+ redisBackend.getNonPersistedChanges(projectId, 12)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ })
+ })
+
+ // This case should never happen, but we'll handle it anyway
+ describe('persisted version before start of changes list', async function () {
+ let changes
+
+ beforeEach(async function () {
+ changes = await setupState(projectId, {
+ headVersion: 5,
+ persistedVersion: 1,
+ changes: 3,
+ })
+ })
+
+ it('should return all non-persisted changes if requested', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 2
+ )
+ expect(nonPersistedChanges).to.deep.equal(changes)
+ })
+
+ it('should return part of the changes if requested', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 3
+ )
+ expect(nonPersistedChanges).to.deep.equal(changes.slice(1))
+ })
+
+ it('should error if the base version requested is too low', async function () {
+ await expect(
+ redisBackend.getNonPersistedChanges(projectId, 1)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ })
+
+ it('should return an empty array if the base version is the head version', async function () {
+ const nonPersistedChanges = await redisBackend.getNonPersistedChanges(
+ projectId,
+ 5
+ )
+ expect(nonPersistedChanges).to.deep.equal([])
+ })
+
+ it('should error if the base version requested is too high', async function () {
+ await expect(
+ redisBackend.getNonPersistedChanges(projectId, 6)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ })
+ })
+ })
+
+ describe('setPersistedVersion', function () {
+ const persistTime = Date.now() + 60 * 1000 // 1 minute from now
+
+ it('should return not_found when project does not exist', async function () {
+ const result = await redisBackend.setPersistedVersion(projectId, 5)
+ expect(result).to.equal('not_found')
+ })
+
+ describe('when the persisted version is not set', function () {
+ beforeEach(async function () {
+ await setupState(projectId, {
+ headVersion: 5,
+ persistedVersion: null,
+ persistTime,
+ changes: 5,
+ })
+ })
+
+ it('should set the persisted version', async function () {
+ const status = await redisBackend.setPersistedVersion(projectId, 3)
+ expect(status).to.equal('ok')
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.equal(3)
+ })
+
+ it('should leave the persist time if the persisted version is not current', async function () {
+ const status = await redisBackend.setPersistedVersion(projectId, 3)
+ expect(status).to.equal('ok')
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistTime).to.deep.equal(persistTime) // Persist time should remain unchanged
+ })
+
+ it('should refuse to set a persisted version greater than the head version', async function () {
+ await expect(
+ redisBackend.setPersistedVersion(projectId, 10)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ // Ensure persisted version remains unchanged
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.be.null
+ })
+
+ it('should clear the persist time when the persisted version is current', async function () {
+ const status = await redisBackend.setPersistedVersion(projectId, 5)
+ expect(status).to.equal('ok')
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.equal(5)
+ expect(state.persistTime).to.be.null // Persist time should be cleared
+ })
+ })
+
+ describe('when the persisted version is set', function () {
+ beforeEach(async function () {
+ await setupState(projectId, {
+ headVersion: 5,
+ persistedVersion: 3,
+ persistTime,
+ changes: 5,
+ })
+ })
+
+ it('should set the persisted version', async function () {
+ const status = await redisBackend.setPersistedVersion(projectId, 5)
+ expect(status).to.equal('ok')
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.equal(5)
+ })
+
+ it('should clear the persist time when the persisted version is current', async function () {
+ const status = await redisBackend.setPersistedVersion(projectId, 5)
+ expect(status).to.equal('ok')
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.equal(5)
+ expect(state.persistTime).to.be.null // Persist time should be cleared
+ })
+
+ it('should leave the persist time if the persisted version is not current', async function () {
+ const status = await redisBackend.setPersistedVersion(projectId, 4)
+ expect(status).to.equal('ok')
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.equal(4)
+ expect(state.persistTime).to.deep.equal(persistTime) // Persist time should remain unchanged
+ })
+
+ it('should not decrease the persisted version', async function () {
+ const status = await redisBackend.setPersistedVersion(projectId, 2)
+ expect(status).to.equal('too_low')
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.equal(3)
+ })
+
+ it('should refuse to set a persisted version greater than the head version', async function () {
+ await expect(
+ redisBackend.setPersistedVersion(projectId, 10)
+ ).to.be.rejectedWith(VersionOutOfBoundsError)
+ // Ensure persisted version remains unchanged
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.equal(3)
+ })
+ })
+
+ it('should trim the changes list to keep only MAX_PERSISTED_CHANGES beyond persisted version', async function () {
+ // Get MAX_PERSISTED_CHANGES to ensure our test data is larger
+ const maxPersistedChanges = redisBackend.MAX_PERSISTED_CHANGES
+
+ // Create a larger number of changes for the test
+ // Using MAX_PERSISTED_CHANGES + 10 to ensure we have enough changes to trigger trimming
+ const totalChanges = maxPersistedChanges + 10
+
+ // Set head version to match total number of changes
+ const headVersion = totalChanges
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+
+ // Create changes for versions 1 through totalChanges
+ const timestamp = new Date()
+ const changes = Array.from(
+ { length: totalChanges },
+ (_, idx) =>
+ new Change(
+ [new AddFileOperation(`file${idx}.tex`, File.fromString('hello'))],
+ timestamp
+ )
+ )
+
+ // Push changes to Redis
+ await rclient.rpush(
+ keySchema.changes({ projectId }),
+ ...changes.map(change => JSON.stringify(change.toRaw()))
+ )
+
+ // Set persisted version to somewhere near the head version
+ const persistedVersion = headVersion - 5
+
+ // Set the persisted version
+ const result = await redisBackend.setPersistedVersion(
+ projectId,
+ persistedVersion
+ )
+ expect(result).to.equal('ok')
+
+ // Get all changes that remain in Redis
+ const remainingChanges = await rclient.lrange(
+ keySchema.changes({ projectId }),
+ 0,
+ -1
+ )
+
+ // Calculate the expected number of changes to remain
+ expect(remainingChanges).to.have.lengthOf(
+ maxPersistedChanges + (headVersion - persistedVersion)
+ )
+
+ // Check that remaining changes are the expected ones
+ const expectedChanges = changes.slice(
+ persistedVersion - maxPersistedChanges,
+ totalChanges
+ )
+ expect(remainingChanges).to.deep.equal(
+ expectedChanges.map(change => JSON.stringify(change.toRaw()))
+ )
+ })
+
+ it('should keep all changes when there are fewer than MAX_PERSISTED_CHANGES', async function () {
+ // Set head version to 5
+ const headVersion = 5
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+
+ // Create changes for versions 1 through 5
+ const timestamp = new Date()
+ const changes = Array.from({ length: 5 }, () => new Change([], timestamp))
+
+ // Push changes to Redis
+ await rclient.rpush(
+ keySchema.changes({ projectId }),
+ ...changes.map(change => JSON.stringify(change.toRaw()))
+ )
+
+ // Set persisted version to 3
+ // All changes should remain since total count is small
+ const persistedVersion = 3
+
+ // Ensure MAX_PERSISTED_CHANGES is larger than our test dataset
+ expect(redisBackend.MAX_PERSISTED_CHANGES).to.be.greaterThan(
+ 5,
+ 'MAX_PERSISTED_CHANGES should be greater than 5 for this test'
+ )
+
+ // Set the persisted version
+ const result = await redisBackend.setPersistedVersion(
+ projectId,
+ persistedVersion
+ )
+ expect(result).to.equal('ok')
+
+ // Get all changes that remain in Redis
+ const remainingChanges = await rclient.lrange(
+ keySchema.changes({ projectId }),
+ 0,
+ -1
+ )
+
+ // All changes should remain
+ expect(remainingChanges).to.have.lengthOf(5)
+ })
+ })
+
+ describe('getState', function () {
+ it('should return complete project state from Redis', async function () {
+ // Set up the test data in Redis
+ const snapshot = new Snapshot()
+ const rawSnapshot = JSON.stringify(snapshot.toRaw())
+ const headVersion = 42
+ const persistedVersion = 40
+ const now = Date.now()
+ const expireTime = now + 60 * 60 * 1000 // 1 hour from now
+ const persistTime = now + 30 * 1000 // 30 seconds from now
+
+ // Create a change
+ const timestamp = new Date()
+ const change = new Change([], timestamp)
+ const serializedChange = JSON.stringify(change.toRaw())
+
+ // Set everything in Redis
+ await rclient.set(keySchema.head({ projectId }), rawSnapshot)
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+ await rclient.set(
+ keySchema.persistedVersion({ projectId }),
+ persistedVersion.toString()
+ )
+ await rclient.set(
+ keySchema.expireTime({ projectId }),
+ expireTime.toString()
+ )
+ await rclient.set(
+ keySchema.persistTime({ projectId }),
+ persistTime.toString()
+ )
+ await rclient.rpush(keySchema.changes({ projectId }), serializedChange)
+
+ // Get the state
+ const state = await redisBackend.getState(projectId)
+
+ // Verify everything matches
+ expect(state).to.exist
+ expect(state.headSnapshot).to.deep.equal(snapshot.toRaw())
+ expect(state.headVersion).to.equal(headVersion)
+ expect(state.persistedVersion).to.equal(persistedVersion)
+ expect(state.expireTime).to.equal(expireTime)
+ expect(state.persistTime).to.equal(persistTime)
+ })
+
+ it('should return proper defaults for missing fields', async function () {
+ // Only set the head snapshot and version, leave others unset
+ const snapshot = new Snapshot()
+ const rawSnapshot = JSON.stringify(snapshot.toRaw())
+ const headVersion = 42
+
+ await rclient.set(keySchema.head({ projectId }), rawSnapshot)
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+
+ // Get the state
+ const state = await redisBackend.getState(projectId)
+
+ // Verify only what we set exists, and other fields have correct defaults
+ expect(state).to.exist
+ expect(state.headSnapshot).to.deep.equal(snapshot.toRaw())
+ expect(state.headVersion).to.equal(headVersion)
+ expect(state.persistedVersion).to.be.null
+ expect(state.expireTime).to.be.null
+ expect(state.persistTime).to.be.null
+ })
+ })
+
+ describe('setExpireTime', function () {
+ it('should set the expire time on an active project', async function () {
+ // Load a fake project in Redis
+ const change = makeChange()
+ await queueChanges(projectId, [change], { expireTime: 123 })
+
+ // Check that the right expire time was recorded
+ let state = await redisBackend.getState(projectId)
+ expect(state.expireTime).to.equal(123)
+
+ // Set the expire time to something else
+ await redisBackend.setExpireTime(projectId, 456)
+ state = await redisBackend.getState(projectId)
+ expect(state.expireTime).to.equal(456)
+ })
+
+ it('should not set an expire time on an inactive project', async function () {
+ let state = await redisBackend.getState(projectId)
+ expect(state.expireTime).to.be.null
+
+ await redisBackend.setExpireTime(projectId, 456)
+ state = await redisBackend.getState(projectId)
+ expect(state.expireTime).to.be.null
+ })
+ })
+
+ describe('expireProject', function () {
+ it('should expire a persisted project', async function () {
+ // Load and persist a project in Redis
+ const change = makeChange()
+ await queueChanges(projectId, [change])
+ await redisBackend.setPersistedVersion(projectId, 1)
+
+ // Check that the project is loaded
+ let state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.equal(1)
+ expect(state.persistedVersion).to.equal(1)
+
+ // Expire the project
+ await redisBackend.expireProject(projectId)
+ state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.be.null
+ })
+
+ it('should not expire a non-persisted project', async function () {
+ // Load a project in Redis
+ const change = makeChange()
+ await queueChanges(projectId, [change])
+
+ // Check that the project is loaded
+ let state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.equal(1)
+ expect(state.persistedVersion).to.equal(null)
+
+ // Expire the project
+ await redisBackend.expireProject(projectId)
+ state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.equal(1)
+ })
+
+ it('should not expire a partially persisted project', async function () {
+ // Load a fake project in Redis
+ const change1 = makeChange()
+ const change2 = makeChange()
+ await queueChanges(projectId, [change1, change2])
+
+ // Persist the first change
+ await redisBackend.setPersistedVersion(projectId, 1)
+
+ // Check that the project is loaded
+ let state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.equal(2)
+ expect(state.persistedVersion).to.equal(1)
+
+ // Expire the project
+ await redisBackend.expireProject(projectId)
+ state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.equal(2)
+ })
+
+ it('should handle a project that is not loaded', async function () {
+ // Check that the project is not loaded
+ let state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.be.null
+
+ // Expire the project
+ await redisBackend.expireProject(projectId)
+ state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.be.null
+ })
+ })
+
+ describe('claimExpireJob', function () {
+ it("should claim the expire job when it's ready", async function () {
+ // Load a project in Redis
+ const change = makeChange()
+ const now = Date.now()
+ const expireTime = now - 1000
+ await queueChanges(projectId, [change], { expireTime })
+
+ // Check that the expire time has been set correctly
+ let state = await redisBackend.getState(projectId)
+ expect(state.expireTime).to.equal(expireTime)
+
+ // Claim the job
+ await redisBackend.claimExpireJob(projectId)
+
+ // Check the job expires in the future
+ state = await redisBackend.getState(projectId)
+ expect(state.expireTime).to.satisfy(time => time > now)
+ })
+
+ it('should throw an error when the job is not ready', async function () {
+ // Load a project in Redis
+ const change = makeChange()
+ const now = Date.now()
+ const expireTime = now + 100_000
+ await queueChanges(projectId, [change], { expireTime })
+
+ // Claim the job
+ await expect(redisBackend.claimExpireJob(projectId)).to.be.rejectedWith(
+ JobNotReadyError
+ )
+ })
+
+ it('should throw an error when the job is not found', async function () {
+ // Claim a job on a project that is not loaded
+ await expect(redisBackend.claimExpireJob(projectId)).to.be.rejectedWith(
+ JobNotFoundError
+ )
+ })
+ })
+
+ describe('claimPersistJob', function () {
+ it("should claim the persist job when it's ready", async function () {
+ // Load a project in Redis
+ const change = makeChange()
+ const now = Date.now()
+ const persistTime = now - 1000
+ await queueChanges(projectId, [change], { persistTime })
+
+ // Check that the persist time has been set correctly
+ let state = await redisBackend.getState(projectId)
+ expect(state.persistTime).to.equal(persistTime)
+
+ // Claim the job
+ await redisBackend.claimPersistJob(projectId)
+
+ // Check the job is not ready
+ state = await redisBackend.getState(projectId)
+ expect(state.persistTime).to.satisfy(time => time > now)
+ })
+
+ it('should throw an error when the job is not ready', async function () {
+ // Load a project in Redis
+ const change = makeChange()
+ const now = Date.now()
+ const persistTime = now + 100_000
+ await queueChanges(projectId, [change], { persistTime })
+
+ // Claim the job
+ await expect(redisBackend.claimPersistJob(projectId)).to.be.rejectedWith(
+ JobNotReadyError
+ )
+ })
+
+ it('should throw an error when the job is not found', async function () {
+ // Claim a job on a project that is not loaded
+ await expect(redisBackend.claimExpireJob(projectId)).to.be.rejectedWith(
+ JobNotFoundError
+ )
+ })
+ })
+
+ describe('closing a job', function () {
+ let job
+
+ beforeEach(async function () {
+ // Load a project in Redis
+ const change = makeChange()
+ const now = Date.now()
+ const expireTime = now - 1000
+ await queueChanges(projectId, [change], { expireTime })
+
+ // Check that the expire time has been set correctly
+ const state = await redisBackend.getState(projectId)
+ expect(state.expireTime).to.equal(expireTime)
+
+ // Claim the job
+ job = await redisBackend.claimExpireJob(projectId)
+ })
+
+ it("should delete the key if it hasn't changed", async function () {
+ await job.close()
+ const state = await redisBackend.getState(projectId)
+ expect(state.expireTime).to.be.null
+ })
+
+ it('should keep the key if it has changed', async function () {
+ const newTimestamp = job.claimTimestamp + 1000
+ await redisBackend.setExpireTime(projectId, newTimestamp)
+ await job.close()
+ const state = await redisBackend.getState(projectId)
+ expect(state.expireTime).to.equal(newTimestamp)
+ })
+ })
+
+ describe('hardDeleteProject', function () {
+ it('should delete all keys associated with the project', async function () {
+ // Setup project state
+ await setupState(projectId, {
+ headVersion: 5,
+ headSnapshot: new Snapshot(),
+ persistedVersion: 3,
+ persistTime: Date.now(),
+ expireTime: Date.now() + 3600 * 1000, // 1 hour from now
+ changes: 5,
+ })
+
+ // Verify that state exists before deletion
+ let state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.equal(5)
+
+ // Call hardDeleteProject
+ const result = await redisBackend.hardDeleteProject(projectId)
+ expect(result).to.equal('ok')
+
+ // Verify that all keys are deleted
+ state = await redisBackend.getState(projectId)
+ expect(state.headVersion).to.be.null
+ expect(state.headSnapshot).to.be.null
+ expect(state.persistedVersion).to.be.null
+ expect(state.persistTime).to.be.null
+ expect(state.expireTime).to.be.null
+ expect(state.changes).to.be.an('array').that.is.empty
+ })
+
+ it('should not throw an error if the project does not exist', async function () {
+ // Call hardDeleteProject on a non-existent project
+ const result = await redisBackend.hardDeleteProject(projectId)
+ expect(result).to.equal('ok')
+ })
+ })
+})
+
+async function queueChanges(projectId, changes, opts = {}) {
+ const baseVersion = 0
+ const headSnapshot = new Snapshot()
+
+ await redisBackend.queueChanges(
+ projectId,
+ headSnapshot,
+ baseVersion,
+ changes,
+ {
+ persistTime: opts.persistTime,
+ expireTime: opts.expireTime,
+ }
+ )
+}
+
+function makeChange() {
+ const timestamp = new Date()
+ return new Change([], timestamp)
+}
+
+/**
+ * Setup Redis buffer state for tests
+ *
+ * @param {string} projectId
+ * @param {object} params
+ * @param {number} params.headVersion
+ * @param {Snapshot} [params.headSnapshot]
+ * @param {number | null} params.persistedVersion
+ * @param {number | null} params.persistTime - time when the project should be persisted
+ * @param {number | null} params.expireTime - time when the project should expire
+ * @param {number} params.changes - number of changes to create
+ * @return {Promise} dummy changes that have been created
+ */
+async function setupState(projectId, params) {
+ await rclient.set(keySchema.headVersion({ projectId }), params.headVersion)
+ if (params.headSnapshot) {
+ await rclient.set(
+ keySchema.head({ projectId }),
+ JSON.stringify(params.headSnapshot.toRaw())
+ )
+ }
+ if (params.persistedVersion) {
+ await rclient.set(
+ keySchema.persistedVersion({ projectId }),
+ params.persistedVersion
+ )
+ }
+ if (params.persistTime) {
+ await rclient.set(keySchema.persistTime({ projectId }), params.persistTime)
+ }
+ if (params.expireTime) {
+ await rclient.set(keySchema.expireTime({ projectId }), params.expireTime)
+ }
+ const changes = []
+ for (let i = 1; i <= params.changes; i++) {
+ const change = new Change(
+ [new AddFileOperation(`file${i}.tex`, File.createHollow(i, i))],
+ new Date()
+ )
+ changes.push(change)
+ }
+ await rclient.rpush(
+ keySchema.changes({ projectId }),
+ changes.map(change => JSON.stringify(change.toRaw()))
+ )
+ return changes
+}
diff --git a/services/history-v1/test/acceptance/js/storage/expire_redis_chunks.test.js b/services/history-v1/test/acceptance/js/storage/expire_redis_chunks.test.js
new file mode 100644
index 0000000000..f8a5943c43
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/expire_redis_chunks.test.js
@@ -0,0 +1,131 @@
+'use strict'
+
+const { expect } = require('chai')
+const { Author, Change } = require('overleaf-editor-core')
+const cleanup = require('./support/cleanup')
+const { setupProjectState, rclient, keySchema } = require('./support/redis')
+const { runScript } = require('./support/runscript')
+
+const SCRIPT_PATH = 'storage/scripts/expire_redis_chunks.js'
+
+function makeChange() {
+ const timestamp = new Date()
+ const author = new Author(123, 'test@example.com', 'Test User')
+ return new Change([], timestamp, [author])
+}
+
+describe('expire_redis_chunks script', function () {
+ beforeEach(cleanup.everything)
+
+ let now, past, future
+
+ // Setup all projects and run the script once before tests
+ beforeEach(async function () {
+ now = Date.now()
+ past = now - 10000 // 10 seconds ago
+ future = now + 60000 // 1 minute in the future
+
+ // Setup all project states explicitly
+ await setupProjectState('expired_persisted', {
+ headVersion: 2,
+ persistedVersion: 2,
+ expireTime: past,
+ })
+ await setupProjectState('expired_initial_state', {
+ headVersion: 0,
+ persistedVersion: 0,
+ expireTime: past,
+ })
+ await setupProjectState('expired_persisted_with_job', {
+ headVersion: 2,
+ persistedVersion: 2,
+ expireTime: past,
+ persistTime: future,
+ })
+ await setupProjectState('expired_not_persisted', {
+ headVersion: 3,
+ persistedVersion: 2,
+ expireTime: past,
+ changes: [makeChange()],
+ })
+ await setupProjectState('expired_no_persisted_version', {
+ headVersion: 1,
+ persistedVersion: null,
+ expireTime: past,
+ changes: [makeChange()],
+ })
+ await setupProjectState('future_expired_persisted', {
+ headVersion: 2,
+ persistedVersion: 2,
+ expireTime: future,
+ })
+ await setupProjectState('future_expired_not_persisted', {
+ headVersion: 3,
+ persistedVersion: 2,
+ expireTime: future,
+ changes: [makeChange()],
+ })
+ await setupProjectState('no_expire_time', {
+ headVersion: 1,
+ persistedVersion: 1,
+ expireTime: null,
+ })
+
+ // Run the expire script once after all projects are set up
+ await runScript(SCRIPT_PATH)
+ })
+
+ async function checkProjectStatus(projectId) {
+ const exists =
+ (await rclient.exists(keySchema.headVersion({ projectId }))) === 1
+ return exists ? 'exists' : 'deleted'
+ }
+
+ it('should expire a project when expireTime is past and it is fully persisted', async function () {
+ const projectId = 'expired_persisted'
+ const status = await checkProjectStatus(projectId)
+ expect(status).to.equal('deleted')
+ })
+
+ it('should expire a project when expireTime is past and it has no changes (initial state)', async function () {
+ const projectId = 'expired_initial_state'
+ const status = await checkProjectStatus(projectId)
+ expect(status).to.equal('deleted')
+ })
+
+ it('should expire a project when expireTime is past and it is fully persisted even if persistTime is set', async function () {
+ const projectId = 'expired_persisted_with_job'
+ const status = await checkProjectStatus(projectId)
+ expect(status).to.equal('deleted')
+ })
+
+ it('should not expire a project when expireTime is past but it is not fully persisted', async function () {
+ const projectId = 'expired_not_persisted'
+ const status = await checkProjectStatus(projectId)
+ expect(status).to.equal('exists')
+ })
+
+ it('should not expire a project when expireTime is past but persistedVersion is not set', async function () {
+ const projectId = 'expired_no_persisted_version'
+ const status = await checkProjectStatus(projectId)
+ expect(status).to.equal('exists')
+ })
+
+ it('should not expire a project when expireTime is in the future (even if fully persisted)', async function () {
+ const projectId = 'future_expired_persisted'
+ const status = await checkProjectStatus(projectId)
+ expect(status).to.equal('exists')
+ })
+
+ it('should not expire a project when expireTime is in the future (if not fully persisted)', async function () {
+ const projectId = 'future_expired_not_persisted'
+ const status = await checkProjectStatus(projectId)
+ expect(status).to.equal('exists')
+ })
+
+ it('should not expire a project when expireTime is not set', async function () {
+ const projectId = 'no_expire_time'
+ const status = await checkProjectStatus(projectId)
+ expect(status).to.equal('exists')
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/storage/fixtures/chunks.js b/services/history-v1/test/acceptance/js/storage/fixtures/chunks.js
index 8f67c17d71..0fb50e49e9 100644
--- a/services/history-v1/test/acceptance/js/storage/fixtures/chunks.js
+++ b/services/history-v1/test/acceptance/js/storage/fixtures/chunks.js
@@ -15,7 +15,7 @@ exports.chunks = {
exports.histories = {
chunkOne: {
projectId: DocFixtures.initializedProject.id,
- chunkId: 1000000,
+ chunkId: '1000000',
json: { snapshot: { files: {} }, changes: [] },
},
}
diff --git a/services/history-v1/test/acceptance/js/storage/persist_buffer.test.mjs b/services/history-v1/test/acceptance/js/storage/persist_buffer.test.mjs
new file mode 100644
index 0000000000..64eb4efcb1
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/persist_buffer.test.mjs
@@ -0,0 +1,590 @@
+'use strict'
+
+import fs from 'node:fs'
+import { expect } from 'chai'
+import {
+ Change,
+ Snapshot,
+ File,
+ TextOperation,
+ AddFileOperation,
+ EditFileOperation, // Added EditFileOperation
+} from 'overleaf-editor-core'
+import persistBuffer from '../../../../storage/lib/persist_buffer.js'
+import chunkStore from '../../../../storage/lib/chunk_store/index.js'
+import { BlobStore } from '../../../../storage/lib/blob_store/index.js'
+import redisBackend from '../../../../storage/lib/chunk_store/redis.js'
+import persistChanges from '../../../../storage/lib/persist_changes.js'
+import cleanup from './support/cleanup.js'
+import fixtures from './support/fixtures.js'
+import testFiles from './support/test_files.js'
+
+describe('persistBuffer', function () {
+ let projectId
+ const initialVersion = 0
+ let limitsToPersistImmediately
+ let blobStore
+
+ before(function () {
+ const farFuture = new Date()
+ farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
+ limitsToPersistImmediately = {
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ maxChunkChanges: 10,
+ }
+ })
+
+ beforeEach(cleanup.everything)
+ beforeEach(fixtures.create)
+
+ beforeEach(async function () {
+ projectId = fixtures.docs.uninitializedProject.id
+ await chunkStore.initializeProject(projectId)
+ blobStore = new BlobStore(projectId)
+ })
+
+ describe('with an empty initial chunk (new project)', function () {
+ it('should persist changes from Redis to a new chunk', async function () {
+ // create an initial snapshot and add the empty file `main.tex`
+ const HELLO_TXT = fs.readFileSync(testFiles.path('hello.txt')).toString()
+
+ const createFile = new Change(
+ [new AddFileOperation('main.tex', File.fromString(HELLO_TXT))],
+ new Date(),
+ []
+ )
+
+ await persistChanges(
+ projectId,
+ [createFile],
+ limitsToPersistImmediately,
+ 0
+ )
+ // Now queue some changes in Redis
+ const op1 = new TextOperation().insert('Hello').retain(HELLO_TXT.length)
+ const change1 = new Change(
+ [new EditFileOperation('main.tex', op1)],
+ new Date()
+ )
+
+ const op2 = new TextOperation()
+ .retain('Hello'.length)
+ .insert(' World')
+ .retain(HELLO_TXT.length)
+ const change2 = new Change(
+ [new EditFileOperation('main.tex', op2)],
+ new Date()
+ )
+
+ const changesToQueue = [change1, change2]
+
+ const finalHeadVersion = initialVersion + 1 + changesToQueue.length
+
+ const now = Date.now()
+ await redisBackend.queueChanges(
+ projectId,
+ new Snapshot(), // dummy snapshot
+ 1,
+ changesToQueue,
+ {
+ persistTime: now + redisBackend.MAX_PERSIST_DELAY_MS,
+ expireTime: now + redisBackend.PROJECT_TTL_MS,
+ }
+ )
+ await redisBackend.setPersistedVersion(projectId, initialVersion)
+
+ // Persist the changes from Redis to the chunk store
+ const persistResult = await persistBuffer(
+ projectId,
+ limitsToPersistImmediately
+ )
+
+ // Check the return value of persistBuffer
+ expect(persistResult).to.exist
+ expect(persistResult).to.have.property('numberOfChangesPersisted')
+ expect(persistResult).to.have.property('originalEndVersion')
+ expect(persistResult).to.have.property('currentChunk')
+ expect(persistResult).to.have.property('resyncNeeded')
+ expect(persistResult.numberOfChangesPersisted).to.equal(
+ changesToQueue.length
+ )
+ expect(persistResult.originalEndVersion).to.equal(initialVersion + 1)
+ expect(persistResult.resyncNeeded).to.be.false
+
+ const latestChunk = await chunkStore.loadLatest(projectId, {
+ persistedOnly: true,
+ })
+ expect(latestChunk).to.exist
+ expect(latestChunk.getStartVersion()).to.equal(initialVersion)
+ expect(latestChunk.getEndVersion()).to.equal(finalHeadVersion)
+ expect(latestChunk.getChanges().length).to.equal(
+ changesToQueue.length + 1
+ )
+ // Check that chunk returned by persistBuffer matches the latest chunk
+ expect(latestChunk).to.deep.equal(persistResult.currentChunk)
+
+ const chunkSnapshot = latestChunk.getSnapshot()
+ expect(Object.keys(chunkSnapshot.getFileMap()).length).to.equal(1)
+
+ const persistedVersionInRedis = (await redisBackend.getState(projectId))
+ .persistedVersion
+ expect(persistedVersionInRedis).to.equal(finalHeadVersion)
+
+ const nonPersisted = await redisBackend.getNonPersistedChanges(
+ projectId,
+ finalHeadVersion
+ )
+ expect(nonPersisted).to.be.an('array').that.is.empty
+ })
+ })
+
+ describe('with an existing chunk and new changes in Redis', function () {
+ it('should persist new changes from Redis, appending to existing history', async function () {
+ const initialContent = 'Initial document content.\n'
+
+ const addInitialFileChange = new Change(
+ [new AddFileOperation('main.tex', File.fromString(initialContent))],
+ new Date(),
+ []
+ )
+
+ await persistChanges(
+ projectId,
+ [addInitialFileChange],
+ limitsToPersistImmediately,
+ initialVersion
+ )
+ const versionAfterInitialSetup = initialVersion + 1 // Now version is 1
+
+ const opForChunk1 = new TextOperation()
+ .retain(initialContent.length)
+ .insert(' First addition.')
+ const changesForChunk1 = [
+ new Change(
+ [new EditFileOperation('main.tex', opForChunk1)],
+ new Date(),
+ []
+ ),
+ ]
+
+ await persistChanges(
+ projectId,
+ changesForChunk1,
+ limitsToPersistImmediately, // Original limits for this step
+ versionAfterInitialSetup // Correct clientEndVersion
+ )
+ // Update persistedChunkEndVersion: 1 (from setup) + 1 (from changesForChunk1) = 2
+ const persistedChunkEndVersion =
+ versionAfterInitialSetup + changesForChunk1.length
+ const contentAfterChunk1 = initialContent + ' First addition.'
+
+ const opVersion2 = new TextOperation()
+ .retain(contentAfterChunk1.length)
+ .insert(' Second addition.')
+ const changeVersion2 = new Change(
+ [new EditFileOperation('main.tex', opVersion2)],
+ new Date(),
+ []
+ )
+
+ const contentAfterChange2 = contentAfterChunk1 + ' Second addition.'
+ const opVersion3 = new TextOperation()
+ .retain(contentAfterChange2.length)
+ .insert(' Third addition.')
+ const changeVersion3 = new Change(
+ [new EditFileOperation('main.tex', opVersion3)],
+ new Date(),
+ []
+ )
+
+ const redisChangesToPush = [changeVersion2, changeVersion3]
+ const finalHeadVersionAfterRedisPush =
+ persistedChunkEndVersion + redisChangesToPush.length
+ const now = Date.now()
+
+ await redisBackend.queueChanges(
+ projectId,
+ new Snapshot(), // Use new Snapshot() like in the first test
+ persistedChunkEndVersion,
+ redisChangesToPush,
+ {
+ persistTime: now + redisBackend.MAX_PERSIST_DELAY_MS,
+ expireTime: now + redisBackend.PROJECT_TTL_MS,
+ }
+ )
+ await redisBackend.setPersistedVersion(
+ projectId,
+ persistedChunkEndVersion
+ )
+
+ const persistResult = await persistBuffer(
+ projectId,
+ limitsToPersistImmediately
+ )
+
+ // Check the return value of persistBuffer
+ expect(persistResult).to.exist
+ expect(persistResult).to.have.property('numberOfChangesPersisted')
+ expect(persistResult).to.have.property('originalEndVersion')
+ expect(persistResult).to.have.property('currentChunk')
+ expect(persistResult).to.have.property('resyncNeeded')
+ expect(persistResult.numberOfChangesPersisted).to.equal(
+ redisChangesToPush.length
+ )
+ expect(persistResult.originalEndVersion).to.equal(
+ persistedChunkEndVersion
+ )
+ expect(persistResult.resyncNeeded).to.be.false
+
+ const latestChunk = await chunkStore.loadLatest(projectId, {
+ persistedOnly: true,
+ })
+ expect(latestChunk).to.exist
+ expect(latestChunk.getStartVersion()).to.equal(0)
+ expect(latestChunk.getEndVersion()).to.equal(
+ finalHeadVersionAfterRedisPush
+ )
+ expect(latestChunk.getChanges().length).to.equal(
+ persistedChunkEndVersion + redisChangesToPush.length
+ )
+
+ const persistedVersionInRedisAfter = (
+ await redisBackend.getState(projectId)
+ ).persistedVersion
+ expect(persistedVersionInRedisAfter).to.equal(
+ finalHeadVersionAfterRedisPush
+ )
+
+ // Check that chunk returned by persistBuffer matches the latest chunk
+ expect(persistResult.currentChunk).to.deep.equal(latestChunk)
+
+ const nonPersisted = await redisBackend.getNonPersistedChanges(
+ projectId,
+ finalHeadVersionAfterRedisPush
+ )
+ expect(nonPersisted).to.be.an('array').that.is.empty
+ })
+ })
+
+ describe('when Redis has no new changes', function () {
+ let persistedChunkEndVersion
+ let changesForChunk1
+
+ beforeEach(async function () {
+ const initialContent = 'Content.'
+
+ const addInitialFileChange = new Change(
+ [new AddFileOperation('main.tex', File.fromString(initialContent))],
+ new Date(),
+ []
+ )
+
+ // Replace chunkStore.create with persistChanges
+ // clientEndVersion is initialVersion (0). This advances version to 1.
+ await persistChanges(
+ projectId,
+ [addInitialFileChange],
+ limitsToPersistImmediately,
+ initialVersion
+ )
+ const versionAfterInitialSetup = initialVersion + 1 // Now version is 1
+
+ const opForChunk1 = new TextOperation()
+ .retain(initialContent.length)
+ .insert(' More.')
+ changesForChunk1 = [
+ new Change(
+ [new EditFileOperation('main.tex', opForChunk1)],
+ new Date(),
+ []
+ ),
+ ]
+ // Corrected persistChanges call: clientEndVersion is versionAfterInitialSetup (1)
+ await persistChanges(
+ projectId,
+ changesForChunk1,
+ limitsToPersistImmediately, // Original limits for this step
+ versionAfterInitialSetup // Correct clientEndVersion
+ )
+ // Update persistedChunkEndVersion: 1 (from setup) + 1 (from changesForChunk1) = 2
+ persistedChunkEndVersion =
+ versionAfterInitialSetup + changesForChunk1.length
+ })
+
+ it('should leave the persisted version and stored chunks unchanged', async function () {
+ const now = Date.now()
+ await redisBackend.queueChanges(
+ projectId,
+ new Snapshot(),
+ persistedChunkEndVersion - 1,
+ changesForChunk1,
+ {
+ persistTime: now + redisBackend.MAX_PERSIST_DELAY_MS,
+ expireTime: now + redisBackend.PROJECT_TTL_MS,
+ }
+ )
+ await redisBackend.setPersistedVersion(
+ projectId,
+ persistedChunkEndVersion
+ )
+
+ const chunksBefore = await chunkStore.getProjectChunks(projectId)
+
+ const persistResult = await persistBuffer(
+ projectId,
+ limitsToPersistImmediately
+ )
+
+ const currentChunk = await chunkStore.loadLatest(projectId, {
+ persistedOnly: true,
+ })
+ expect(persistResult).to.deep.equal({
+ numberOfChangesPersisted: 0,
+ originalEndVersion: persistedChunkEndVersion,
+ currentChunk,
+ resyncNeeded: false,
+ })
+
+ const chunksAfter = await chunkStore.getProjectChunks(projectId)
+ expect(chunksAfter.length).to.equal(chunksBefore.length)
+ expect(chunksAfter).to.deep.equal(chunksBefore)
+
+ const finalPersistedVersionInRedis = (
+ await redisBackend.getState(projectId)
+ ).persistedVersion
+ expect(finalPersistedVersionInRedis).to.equal(persistedChunkEndVersion)
+ })
+
+ it('should update the persisted version if it is behind the chunk store end version', async function () {
+ const now = Date.now()
+
+ await redisBackend.queueChanges(
+ projectId,
+ new Snapshot(),
+ persistedChunkEndVersion - 1,
+ changesForChunk1,
+ {
+ persistTime: now + redisBackend.MAX_PERSIST_DELAY_MS,
+ expireTime: now + redisBackend.PROJECT_TTL_MS,
+ }
+ )
+ // Force the persisted version in Redis to lag behind the chunk store,
+ // simulating the situation where a worker has persisted changes to the
+ // chunk store but failed to update the version in redis.
+ await redisBackend.setPersistedVersion(
+ projectId,
+ persistedChunkEndVersion - 1
+ )
+
+ const chunksBefore = await chunkStore.getProjectChunks(projectId)
+
+ // Persist buffer (which should do nothing as there are no new changes)
+ const persistResult = await persistBuffer(
+ projectId,
+ limitsToPersistImmediately
+ )
+
+ // Check the return value
+ const currentChunk = await chunkStore.loadLatest(projectId, {
+ persistedOnly: true,
+ })
+ expect(persistResult).to.deep.equal({
+ numberOfChangesPersisted: 0,
+ originalEndVersion: persistedChunkEndVersion,
+ currentChunk,
+ resyncNeeded: false,
+ })
+
+ const chunksAfter = await chunkStore.getProjectChunks(projectId)
+ expect(chunksAfter.length).to.equal(chunksBefore.length)
+ expect(chunksAfter).to.deep.equal(chunksBefore)
+
+ const finalPersistedVersionInRedis = (
+ await redisBackend.getState(projectId)
+ ).persistedVersion
+ expect(finalPersistedVersionInRedis).to.equal(persistedChunkEndVersion)
+ })
+ })
+
+ describe('when limits restrict the number of changes to persist', function () {
+ it('should persist only a subset of changes and update persistedVersion accordingly', async function () {
+ const now = Date.now()
+ const oneDayAgo = now - 1000 * 60 * 60 * 24
+ const oneHourAgo = now - 1000 * 60 * 60
+ const twoHoursAgo = now - 1000 * 60 * 60 * 2
+ const threeHoursAgo = now - 1000 * 60 * 60 * 3
+
+ // Create an initial file with some content
+ const initialContent = 'Initial content.'
+ const addInitialFileChange = new Change(
+ [new AddFileOperation('main.tex', File.fromString(initialContent))],
+ new Date(oneDayAgo),
+ []
+ )
+
+ await persistChanges(
+ projectId,
+ [addInitialFileChange],
+ limitsToPersistImmediately,
+ initialVersion
+ )
+ const versionAfterInitialSetup = initialVersion + 1 // Version is 1
+
+ // Queue three additional changes in Redis
+ const op1 = new TextOperation()
+ .retain(initialContent.length)
+ .insert(' Change 1.')
+ const change1 = new Change(
+ [new EditFileOperation('main.tex', op1)],
+ new Date(threeHoursAgo)
+ )
+ const contentAfterC1 = initialContent + ' Change 1.'
+
+ const op2 = new TextOperation()
+ .retain(contentAfterC1.length)
+ .insert(' Change 2.')
+ const change2 = new Change(
+ [new EditFileOperation('main.tex', op2)],
+ new Date(twoHoursAgo)
+ )
+ const contentAfterC2 = contentAfterC1 + ' Change 2.'
+
+ const op3 = new TextOperation()
+ .retain(contentAfterC2.length)
+ .insert(' Change 3.')
+ const change3 = new Change(
+ [new EditFileOperation('main.tex', op3)],
+ new Date(oneHourAgo)
+ )
+
+ const changesToQueue = [change1, change2, change3]
+ await redisBackend.queueChanges(
+ projectId,
+ new Snapshot(), // dummy snapshot
+ versionAfterInitialSetup, // startVersion for queued changes
+ changesToQueue,
+ {
+ persistTime: now + redisBackend.MAX_PERSIST_DELAY_MS,
+ expireTime: now + redisBackend.PROJECT_TTL_MS,
+ }
+ )
+ await redisBackend.setPersistedVersion(
+ projectId,
+ versionAfterInitialSetup
+ )
+
+ // Define limits to only persist 2 additional changes (on top of the initial file creation),
+ // which should leave the final change (change3) in the redis buffer.
+ const restrictiveLimits = {
+ minChangeTimestamp: new Date(oneHourAgo), // only changes more than 1 hour old are considered
+ maxChangeTimestamp: new Date(twoHoursAgo), // they will be persisted if any change is older than 2 hours
+ }
+
+ const persistResult = await persistBuffer(projectId, restrictiveLimits)
+
+ // Check the return value of persistBuffer
+ expect(persistResult).to.exist
+ expect(persistResult).to.have.property('numberOfChangesPersisted')
+ expect(persistResult).to.have.property('originalEndVersion')
+ expect(persistResult).to.have.property('currentChunk')
+ expect(persistResult).to.have.property('resyncNeeded')
+ expect(persistResult.numberOfChangesPersisted).to.equal(2) // change1 + change2
+ expect(persistResult.originalEndVersion).to.equal(
+ versionAfterInitialSetup
+ )
+ expect(persistResult.resyncNeeded).to.be.false
+
+ // Check the latest persisted chunk, it should only have the initial file and the first two changes
+ const latestChunk = await chunkStore.loadLatest(projectId, {
+ persistedOnly: true,
+ })
+ expect(latestChunk).to.exist
+ expect(latestChunk.getChanges().length).to.equal(3) // addInitialFileChange + change1 + change2
+ expect(latestChunk.getStartVersion()).to.equal(initialVersion)
+ const expectedEndVersion = versionAfterInitialSetup + 2 // Persisted two changes from the queue
+ expect(latestChunk.getEndVersion()).to.equal(expectedEndVersion)
+
+ // Check that chunk returned by persistBuffer matches the latest chunk
+ expect(persistResult.currentChunk).to.deep.equal(latestChunk)
+
+ // Check persisted version in Redis
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.equal(expectedEndVersion)
+
+ // Check non-persisted changes in Redis
+ const nonPersisted = await redisBackend.getNonPersistedChanges(
+ projectId,
+ expectedEndVersion
+ )
+ expect(nonPersisted).to.be.an('array').with.lengthOf(1) // change3 should remain
+ expect(nonPersisted).to.deep.equal([change3])
+ })
+ })
+
+ describe('with lots of changes to persist', function () {
+ it('should persist all changes', async function () {
+ const changes = []
+ // Create an initial file with some content
+ const blob = await blobStore.putString('')
+ changes.push(
+ new Change(
+ [new AddFileOperation('main.tex', File.createLazyFromBlobs(blob))],
+ new Date(),
+ []
+ )
+ )
+
+ for (let i = 0; i < 500; i++) {
+ const op = new TextOperation().retain(i).insert('x')
+ changes.push(
+ new Change([new EditFileOperation('main.tex', op)], new Date())
+ )
+ }
+
+ const now = Date.now()
+ await redisBackend.queueChanges(
+ projectId,
+ new Snapshot(), // dummy snapshot
+ 0, // startVersion for queued changes
+ changes,
+ {
+ persistTime: now + redisBackend.MAX_PERSIST_DELAY_MS,
+ expireTime: now + redisBackend.PROJECT_TTL_MS,
+ }
+ )
+
+ const expectedEndVersion = 501
+ const persistResult = await persistBuffer(
+ projectId,
+ limitsToPersistImmediately
+ )
+ expect(persistResult.numberOfChangesPersisted).to.equal(
+ expectedEndVersion
+ )
+ expect(persistResult.originalEndVersion).to.equal(0)
+ expect(persistResult.resyncNeeded).to.be.false
+
+ // Check the latest persisted chunk
+ const latestChunk = await chunkStore.loadLatest(projectId, {
+ persistedOnly: true,
+ })
+ expect(latestChunk).to.exist
+ expect(latestChunk.getEndVersion()).to.equal(expectedEndVersion)
+
+ // Check that chunk returned by persistBuffer matches the latest chunk
+ expect(persistResult.currentChunk).to.deep.equal(latestChunk)
+
+ // Check persisted version in Redis
+ const state = await redisBackend.getState(projectId)
+ expect(state.persistedVersion).to.equal(expectedEndVersion)
+
+ // Check non-persisted changes in Redis
+ const nonPersisted = await redisBackend.getNonPersistedChanges(
+ projectId,
+ expectedEndVersion
+ )
+ expect(nonPersisted).to.deep.equal([])
+ })
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/storage/persist_changes.test.js b/services/history-v1/test/acceptance/js/storage/persist_changes.test.js
index d661007c39..4c491db4a3 100644
--- a/services/history-v1/test/acceptance/js/storage/persist_changes.test.js
+++ b/services/history-v1/test/acceptance/js/storage/persist_changes.test.js
@@ -1,5 +1,6 @@
'use strict'
+const { createHash } = require('node:crypto')
const { expect } = require('chai')
const cleanup = require('./support/cleanup')
@@ -11,6 +12,8 @@ const persistChanges = storage.persistChanges
const core = require('overleaf-editor-core')
const AddFileOperation = core.AddFileOperation
+const EditFileOperation = core.EditFileOperation
+const TextOperation = core.TextOperation
const Change = core.Change
const Chunk = core.Chunk
const File = core.File
@@ -28,7 +31,7 @@ describe('persistChanges', function () {
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
})
- it('persists changes', function () {
+ it('persists changes', async function () {
const limitsToPersistImmediately = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
@@ -41,29 +44,30 @@ describe('persistChanges', function () {
)
const changes = [change]
- return chunkStore
- .initializeProject(projectId)
- .then(() => {
- return persistChanges(projectId, changes, limitsToPersistImmediately, 0)
- })
- .then(result => {
- const history = new History(new Snapshot(), changes)
- const currentChunk = new Chunk(history, 0)
- expect(result).to.deep.equal({
- numberOfChangesPersisted: 1,
- originalEndVersion: 0,
- currentChunk,
- })
- return chunkStore.loadLatest(projectId)
- })
- .then(chunk => {
- expect(chunk.getStartVersion()).to.equal(0)
- expect(chunk.getEndVersion()).to.equal(1)
- expect(chunk.getChanges().length).to.equal(1)
- })
+ await chunkStore.initializeProject(projectId)
+ const result = await persistChanges(
+ projectId,
+ changes,
+ limitsToPersistImmediately,
+ 0
+ )
+
+ const history = new History(new Snapshot(), changes)
+ const currentChunk = new Chunk(history, 0)
+ expect(result).to.deep.equal({
+ numberOfChangesPersisted: 1,
+ originalEndVersion: 0,
+ currentChunk,
+ resyncNeeded: false,
+ })
+
+ const chunk = await chunkStore.loadLatest(projectId)
+ expect(chunk.getStartVersion()).to.equal(0)
+ expect(chunk.getEndVersion()).to.equal(1)
+ expect(chunk.getChanges().length).to.equal(1)
})
- it('persists changes in two chunks', function () {
+ it('persists changes in three chunks', async function () {
const limitsToPersistImmediately = {
maxChunkChanges: 1,
minChangeTimestamp: farFuture,
@@ -80,38 +84,47 @@ describe('persistChanges', function () {
new Date(),
[]
)
- const changes = [firstChange, secondChange]
+ const thirdChange = new Change(
+ [new AddFileOperation('c.tex', File.fromString(''))],
+ new Date(),
+ []
+ )
+ const changes = [firstChange, secondChange, thirdChange]
- return chunkStore
- .initializeProject(projectId)
- .then(() => {
- return persistChanges(projectId, changes, limitsToPersistImmediately, 0)
- })
- .then(result => {
- const snapshot = Snapshot.fromRaw({
- files: {
- 'a.tex': {
- content: '',
- },
- },
- })
- const history = new History(snapshot, [secondChange])
- const currentChunk = new Chunk(history, 1)
- expect(result).to.deep.equal({
- numberOfChangesPersisted: 2,
- originalEndVersion: 0,
- currentChunk,
- })
- return chunkStore.loadLatest(projectId)
- })
- .then(chunk => {
- expect(chunk.getStartVersion()).to.equal(1)
- expect(chunk.getEndVersion()).to.equal(2)
- expect(chunk.getChanges().length).to.equal(1)
- })
+ await chunkStore.initializeProject(projectId)
+ const result = await persistChanges(
+ projectId,
+ changes,
+ limitsToPersistImmediately,
+ 0
+ )
+
+ const snapshot = Snapshot.fromRaw({
+ files: {
+ 'a.tex': {
+ content: '',
+ },
+ 'b.tex': {
+ content: '',
+ },
+ },
+ })
+ const history = new History(snapshot, [thirdChange])
+ const currentChunk = new Chunk(history, 2)
+ expect(result).to.deep.equal({
+ numberOfChangesPersisted: 3,
+ originalEndVersion: 0,
+ currentChunk,
+ resyncNeeded: false,
+ })
+
+ const chunk = await chunkStore.loadLatest(projectId)
+ expect(chunk.getStartVersion()).to.equal(2)
+ expect(chunk.getEndVersion()).to.equal(3)
+ expect(chunk.getChanges().length).to.equal(1)
})
- it('persists the snapshot at the start of the chunk', function () {
+ it('persists the snapshot at the start of the chunk', async function () {
const limitsToPersistImmediately = {
maxChunkChanges: 2,
minChangeTimestamp: farFuture,
@@ -130,29 +143,30 @@ describe('persistChanges', function () {
)
const changes = [firstChange, secondChange]
- return chunkStore
- .initializeProject(projectId)
- .then(() => {
- return persistChanges(projectId, changes, limitsToPersistImmediately, 0)
- })
- .then(result => {
- const history = new History(new Snapshot(), changes)
- const currentChunk = new Chunk(history, 0)
- expect(result).to.deep.equal({
- numberOfChangesPersisted: 2,
- originalEndVersion: 0,
- currentChunk,
- })
- return chunkStore.loadLatest(projectId)
- })
- .then(chunk => {
- expect(chunk.getStartVersion()).to.equal(0)
- expect(chunk.getEndVersion()).to.equal(2)
- expect(chunk.getChanges().length).to.equal(2)
- })
+ await chunkStore.initializeProject(projectId)
+ const result = await persistChanges(
+ projectId,
+ changes,
+ limitsToPersistImmediately,
+ 0
+ )
+
+ const history = new History(new Snapshot(), changes)
+ const currentChunk = new Chunk(history, 0)
+ expect(result).to.deep.equal({
+ numberOfChangesPersisted: 2,
+ originalEndVersion: 0,
+ currentChunk,
+ resyncNeeded: false,
+ })
+
+ const chunk = await chunkStore.loadLatest(projectId)
+ expect(chunk.getStartVersion()).to.equal(0)
+ expect(chunk.getEndVersion()).to.equal(2)
+ expect(chunk.getChanges().length).to.equal(2)
})
- it("errors if the version doesn't match the latest chunk", function () {
+ it("errors if the version doesn't match the latest chunk", async function () {
const limitsToPersistImmediately = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
@@ -169,18 +183,82 @@ describe('persistChanges', function () {
[]
)
const changes = [firstChange, secondChange]
- return chunkStore
- .initializeProject(projectId)
- .then(() => {
- return persistChanges(projectId, changes, limitsToPersistImmediately, 1)
- })
- .then(() => {
- expect.fail()
- })
- .catch(err => {
- expect(err.message).to.equal(
- 'client sent updates with end_version 1 but latest chunk has end_version 0'
- )
- })
+
+ await chunkStore.initializeProject(projectId)
+ await expect(
+ persistChanges(projectId, changes, limitsToPersistImmediately, 1)
+ ).to.be.rejectedWith(
+ 'client sent updates with end_version 1 but latest chunk has end_version 0'
+ )
+ })
+
+ describe('content hash validation', function () {
+ it('acccepts a change with a valid hash', async function () {
+ const limitsToPersistImmediately = {
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ }
+
+ const projectId = fixtures.docs.uninitializedProject.id
+ await chunkStore.initializeProject(projectId)
+ const textOperation = new TextOperation()
+ textOperation.insert('hello ')
+ textOperation.retain(5)
+ textOperation.contentHash = hashString('hello world')
+ const change = new Change(
+ [
+ new AddFileOperation('a.tex', File.fromString('world')),
+ new EditFileOperation('a.tex', textOperation),
+ ],
+ new Date(),
+ []
+ )
+ const changes = [change]
+
+ const result = await persistChanges(
+ projectId,
+ changes,
+ limitsToPersistImmediately,
+ 0
+ )
+ expect(result.numberOfChangesPersisted).to.equal(1)
+ })
+
+ it('turns on the resyncNeeded flag if content hash validation fails', async function () {
+ const limitsToPersistImmediately = {
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ }
+
+ const projectId = fixtures.docs.uninitializedProject.id
+ await chunkStore.initializeProject(projectId)
+ const textOperation = new TextOperation()
+ textOperation.insert('hello ')
+ textOperation.retain(5)
+ textOperation.contentHash = hashString('bad hash')
+ const change = new Change(
+ [
+ new AddFileOperation('a.tex', File.fromString('world')),
+ new EditFileOperation('a.tex', textOperation),
+ ],
+ new Date(),
+ []
+ )
+ const changes = [change]
+
+ const result = await persistChanges(
+ projectId,
+ changes,
+ limitsToPersistImmediately,
+ 0
+ )
+ expect(result.resyncNeeded).to.be.true
+ })
})
})
+
+function hashString(s) {
+ const hash = createHash('sha-1')
+ hash.update(s)
+ return hash.digest('hex')
+}
diff --git a/services/history-v1/test/acceptance/js/storage/persist_redis_chunks.test.js b/services/history-v1/test/acceptance/js/storage/persist_redis_chunks.test.js
new file mode 100644
index 0000000000..58261703bb
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/persist_redis_chunks.test.js
@@ -0,0 +1,262 @@
+'use strict'
+
+const { expect } = require('chai')
+const {
+ Change,
+ AddFileOperation,
+ EditFileOperation,
+ TextOperation,
+ File,
+} = require('overleaf-editor-core')
+const cleanup = require('./support/cleanup')
+const fixtures = require('./support/fixtures')
+const chunkStore = require('../../../../storage/lib/chunk_store')
+const { getState } = require('../../../../storage/lib/chunk_store/redis')
+const { setupProjectState } = require('./support/redis')
+const { runScript } = require('./support/runscript')
+const persistChanges = require('../../../../storage/lib/persist_changes')
+
+const SCRIPT_PATH = 'storage/scripts/persist_redis_chunks.mjs'
+
+describe('persist_redis_chunks script', function () {
+ before(cleanup.everything)
+
+ let now, past, future
+ let projectIdsStore // To store the generated project IDs, keyed by scenario name
+ let limitsToPersistImmediately
+
+ before(async function () {
+ const farFuture = new Date()
+ farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
+ limitsToPersistImmediately = {
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ maxChunkChanges: 100, // Allow enough changes for setup
+ }
+
+ await fixtures.create()
+
+ now = Date.now()
+ past = now - 10000 // 10 seconds ago
+ future = now + 60000 // 1 minute in the future
+
+ projectIdsStore = {}
+
+ // Scenario 1: project_due_for_persistence
+ // Goal: Has initial persisted content (v1), Redis has new changes (v1->v2) due for persistence.
+ // Expected: Script persists Redis changes, persistedVersion becomes 2.
+ {
+ const dueProjectId = await chunkStore.initializeProject()
+ projectIdsStore.project_due_for_persistence = dueProjectId
+ const initialContent = 'Initial content for due project.'
+ const initialChange = new Change(
+ [new AddFileOperation('main.tex', File.fromString(initialContent))],
+ new Date(now - 30000), // 30 seconds ago
+ []
+ )
+ await persistChanges(
+ dueProjectId,
+ [initialChange],
+ limitsToPersistImmediately,
+ 0
+ )
+ const secondChangeDue = new Change(
+ [
+ new EditFileOperation(
+ 'main.tex',
+ new TextOperation()
+ .retain(initialContent.length)
+ .insert(' More content.')
+ ),
+ ],
+ new Date(now - 20000), // 20 seconds ago
+ []
+ )
+ await setupProjectState(dueProjectId, {
+ persistTime: past,
+ headVersion: 2, // After secondChangeDue
+ persistedVersion: 1, // Initial content is at v1
+ changes: [secondChangeDue], // New changes in Redis (v1->v2)
+ expireTimeFuture: true,
+ })
+ }
+
+ // Scenario 2: project_not_due_for_persistence
+ // Goal: Has initial persisted content (v1), Redis has no new changes, not due.
+ // Expected: Script does nothing, persistedVersion remains 1.
+ {
+ const notDueProjectId = await chunkStore.initializeProject()
+ projectIdsStore.project_not_due_for_persistence = notDueProjectId
+ const initialContent = 'Initial content for not_due project.'
+ const initialChange = new Change(
+ [new AddFileOperation('main.tex', File.fromString(initialContent))],
+ new Date(now - 30000), // 30 seconds ago
+ []
+ )
+ await persistChanges(
+ notDueProjectId,
+ [initialChange],
+ limitsToPersistImmediately,
+ 0
+ ) // Persisted: v0 -> v1
+ await setupProjectState(notDueProjectId, {
+ persistTime: future,
+ headVersion: 1, // Matches persisted version
+ persistedVersion: 1,
+ changes: [], // No new changes in Redis
+ expireTimeFuture: true,
+ })
+ }
+
+ // Scenario 3: project_no_persist_time
+ // Goal: Has initial persisted content (v1), Redis has no new changes, no persistTime.
+ // Expected: Script does nothing, persistedVersion remains 1.
+ {
+ const noPersistTimeProjectId = await chunkStore.initializeProject()
+ projectIdsStore.project_no_persist_time = noPersistTimeProjectId
+ const initialContent = 'Initial content for no_persist_time project.'
+ const initialChange = new Change(
+ [new AddFileOperation('main.tex', File.fromString(initialContent))],
+ new Date(now - 30000), // 30 seconds ago
+ []
+ )
+ await persistChanges(
+ noPersistTimeProjectId,
+ [initialChange],
+ limitsToPersistImmediately,
+ 0
+ ) // Persisted: v0 -> v1
+ await setupProjectState(noPersistTimeProjectId, {
+ persistTime: null,
+ headVersion: 1, // Matches persisted version
+ persistedVersion: 1,
+ changes: [], // No new changes in Redis
+ expireTimeFuture: true,
+ })
+ }
+
+ // Scenario 4: project_due_fully_persisted
+ // Goal: Has content persisted up to v2, Redis reflects this (head=2, persisted=2), due for check.
+ // Expected: Script clears persistTime, persistedVersion remains 2.
+ {
+ const dueFullyPersistedId = await chunkStore.initializeProject()
+ projectIdsStore.project_due_fully_persisted = dueFullyPersistedId
+ const initialContent = 'Content part 1 for fully persisted.'
+ const change1 = new Change(
+ [new AddFileOperation('main.tex', File.fromString(initialContent))],
+ new Date(now - 40000), // 40 seconds ago
+ []
+ )
+ const change2 = new Change(
+ [
+ new EditFileOperation(
+ 'main.tex',
+ new TextOperation()
+ .retain(initialContent.length)
+ .insert(' Content part 2.')
+ ),
+ ],
+ new Date(now - 30000), // 30 seconds ago
+ []
+ )
+ await persistChanges(
+ dueFullyPersistedId,
+ [change1, change2],
+ limitsToPersistImmediately,
+ 0
+ )
+ await setupProjectState(dueFullyPersistedId, {
+ persistTime: past,
+ headVersion: 2,
+ persistedVersion: 2,
+ changes: [], // No new unpersisted changes in Redis
+ expireTimeFuture: true,
+ })
+ }
+
+ // Scenario 5: project_fails_to_persist
+ // Goal: Has initial persisted content (v1), Redis has new changes (v1->v2) due for persistence, but these changes will cause an error.
+ // Expected: Script attempts to persist, fails, and persistTime is NOT cleared.
+ {
+ const failsToPersistProjectId = await chunkStore.initializeProject()
+ projectIdsStore.project_fails_to_persist = failsToPersistProjectId
+ const initialContent = 'Initial content for failure case.'
+ const initialChange = new Change(
+ [new AddFileOperation('main.tex', File.fromString(initialContent))],
+ new Date(now - 30000), // 30 seconds ago
+ []
+ )
+ await persistChanges(
+ failsToPersistProjectId,
+ [initialChange],
+ limitsToPersistImmediately,
+ 0
+ )
+ // This change will fail because it tries to insert at a non-existent offset
+ // assuming the initial content is shorter than 1000 characters.
+ const conflictingChange = new Change(
+ [
+ new EditFileOperation(
+ 'main.tex',
+ new TextOperation().retain(1000).insert('This will fail.')
+ ),
+ ],
+ new Date(now - 20000), // 20 seconds ago
+ []
+ )
+ await setupProjectState(failsToPersistProjectId, {
+ persistTime: past, // Due for persistence
+ headVersion: 2, // After conflictingChange
+ persistedVersion: 1, // Initial content is at v1
+ changes: [conflictingChange], // New changes in Redis (v1->v2)
+ expireTimeFuture: true,
+ })
+ }
+
+ await runScript(SCRIPT_PATH)
+ })
+
+ describe('when the buffer has new changes', function () {
+ it('should update persisted-version when the persist-time is in the past', async function () {
+ const projectId = projectIdsStore.project_due_for_persistence
+ const state = await getState(projectId)
+ // console.log('State after running script (project_due_for_persistence):', state)
+ expect(state.persistTime).to.be.null
+ expect(state.persistedVersion).to.equal(2)
+ })
+
+ it('should not perform any operations when the persist-time is in the future', async function () {
+ const projectId = projectIdsStore.project_not_due_for_persistence
+ const state = await getState(projectId)
+ expect(state.persistTime).to.equal(future)
+ expect(state.persistedVersion).to.equal(1)
+ })
+ })
+
+ describe('when the changes in the buffer are already persisted', function () {
+ it('should delete persist-time for a project when the persist-time is in the past', async function () {
+ const projectId = projectIdsStore.project_due_fully_persisted
+ const state = await getState(projectId)
+ expect(state.persistTime).to.be.null
+ expect(state.persistedVersion).to.equal(2)
+ })
+ })
+
+ describe('when there is no persist-time set', function () {
+ it('should not change redis when there is no persist-time set initially', async function () {
+ const projectId = projectIdsStore.project_no_persist_time
+ const state = await getState(projectId)
+ expect(state.persistTime).to.be.null
+ expect(state.persistedVersion).to.equal(1)
+ })
+ })
+
+ describe('when persistence fails due to conflicting changes', function () {
+ it('should not clear persist-time and not update persisted-version', async function () {
+ const projectId = projectIdsStore.project_fails_to_persist
+ const state = await getState(projectId)
+ expect(state.persistTime).to.be.greaterThan(now) // persistTime should be pushed to the future by RETRY_DELAY_MS
+ expect(state.persistedVersion).to.equal(1) // persistedVersion should not change
+ })
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/storage/queue_changes.test.js b/services/history-v1/test/acceptance/js/storage/queue_changes.test.js
new file mode 100644
index 0000000000..dbfe8c7e56
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/queue_changes.test.js
@@ -0,0 +1,416 @@
+'use strict'
+
+const { expect } = require('chai')
+const sinon = require('sinon')
+
+const cleanup = require('./support/cleanup')
+const fixtures = require('./support/fixtures')
+const testFiles = require('./support/test_files.js')
+const storage = require('../../../../storage')
+const chunkStore = storage.chunkStore
+const queueChanges = storage.queueChanges
+const redisBackend = require('../../../../storage/lib/chunk_store/redis')
+
+const core = require('overleaf-editor-core')
+const AddFileOperation = core.AddFileOperation
+const EditFileOperation = core.EditFileOperation
+const TextOperation = core.TextOperation
+const Change = core.Change
+const Chunk = core.Chunk
+const File = core.File
+const Snapshot = core.Snapshot
+const BlobStore = storage.BlobStore
+const persistChanges = storage.persistChanges
+
+describe('queueChanges', function () {
+ let limitsToPersistImmediately
+ before(function () {
+ // Used to provide a limit which forces us to persist all of the changes
+ const farFuture = new Date()
+ farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
+ limitsToPersistImmediately = {
+ minChangeTimestamp: farFuture,
+ maxChangeTimestamp: farFuture,
+ maxChanges: 10,
+ maxChunkChanges: 10,
+ }
+ })
+
+ beforeEach(cleanup.everything)
+ beforeEach(fixtures.create)
+ afterEach(function () {
+ sinon.restore()
+ })
+
+ it('queues changes when redis has no snapshot (falls back to chunkStore with an empty chunk)', async function () {
+ // Start with an empty chunk store for the project
+ const projectId = fixtures.docs.uninitializedProject.id
+ await chunkStore.initializeProject(projectId)
+
+ // Ensure that the initial state in redis is empty
+ const initialRedisState = await redisBackend.getState(projectId)
+ expect(initialRedisState.headVersion).to.be.null
+ expect(initialRedisState.headSnapshot).to.be.null
+ expect(initialRedisState.changes).to.be.an('array').that.is.empty
+
+ // Add a test file to the blob store
+ const blobStore = new BlobStore(projectId)
+ await blobStore.putFile(testFiles.path('hello.txt'))
+
+ // Prepare an initial change to add a single file to an empty project
+ const change = new Change(
+ [
+ new AddFileOperation(
+ 'test.tex',
+ File.fromHash(testFiles.HELLO_TXT_HASH)
+ ),
+ ],
+ new Date(),
+ []
+ )
+ const changesToQueue = [change]
+ const endVersion = 0
+
+ // Queue the changes to add the test file
+ const status = await queueChanges(projectId, changesToQueue, endVersion)
+ expect(status).to.equal('ok')
+
+ // Verify that we now have some state in redis
+ const redisState = await redisBackend.getState(projectId)
+ expect(redisState).to.not.be.null
+
+ // Compute the expected snapshot after applying the changes
+ const expectedSnapshot = new Snapshot()
+ await expectedSnapshot.loadFiles('hollow', blobStore)
+ for (const change of changesToQueue) {
+ const hollowChange = change.clone()
+ await hollowChange.loadFiles('hollow', blobStore)
+ hollowChange.applyTo(expectedSnapshot, { strict: true })
+ }
+
+ // Confirm that state in redis matches the expected snapshot and changes queue
+ const expectedVersionInRedis = endVersion + changesToQueue.length
+ expect(redisState.headVersion).to.equal(expectedVersionInRedis)
+ expect(redisState.headSnapshot).to.deep.equal(expectedSnapshot.toRaw())
+ expect(redisState.changes).to.deep.equal(changesToQueue.map(c => c.toRaw()))
+ })
+
+ it('queues changes when redis has no snapshot (falls back to chunkStore with an existing chunk)', async function () {
+ const projectId = fixtures.docs.uninitializedProject.id
+
+ // Initialise the project in the chunk store using the "Hello World" test file
+ await chunkStore.initializeProject(projectId)
+ const blobStore = new BlobStore(projectId)
+ await blobStore.putFile(testFiles.path('hello.txt'))
+ const change = new Change(
+ [
+ new AddFileOperation(
+ 'hello.tex',
+ File.fromHash(testFiles.HELLO_TXT_HASH)
+ ),
+ ],
+ new Date(),
+ []
+ )
+ const initialChanges = [change]
+ const initialVersion = 0
+
+ const result = await persistChanges(
+ projectId,
+ initialChanges,
+ limitsToPersistImmediately,
+ initialVersion
+ )
+ // Compute the state after the initial changes are persisted for later comparison
+ const endVersion = initialVersion + initialChanges.length
+ const { currentChunk } = result
+ const originalSnapshot = result.currentChunk.getSnapshot()
+ await originalSnapshot.loadFiles('hollow', blobStore)
+ originalSnapshot.applyAll(currentChunk.getChanges())
+
+ // Ensure that the initial state in redis is empty
+ const initialRedisState = await redisBackend.getState(projectId)
+ expect(initialRedisState.headVersion).to.be.null
+ expect(initialRedisState.headSnapshot).to.be.null
+ expect(initialRedisState.changes).to.be.an('array').that.is.empty
+
+ // Prepare a change to edit the existing file
+ const editFileOp = new EditFileOperation(
+ 'hello.tex',
+ new TextOperation()
+ .insert('Hello')
+ .retain(testFiles.HELLO_TXT_UTF8_LENGTH)
+ )
+ const editFileChange = new Change([editFileOp], new Date(), [])
+ const changesToQueue = [editFileChange]
+
+ // Queue the changes to edit the existing file
+ const status = await queueChanges(projectId, changesToQueue, endVersion)
+ expect(status).to.equal('ok')
+
+ // Verify that we now have some state in redis
+ const redisState = await redisBackend.getState(projectId)
+ expect(redisState).to.not.be.null
+
+ // Compute the expected snapshot after applying the changes
+ const expectedSnapshot = originalSnapshot.clone()
+ await expectedSnapshot.loadFiles('hollow', blobStore)
+ expectedSnapshot.applyAll(changesToQueue)
+
+ // Confirm that state in redis matches the expected snapshot and changes queue
+ const expectedVersionInRedis = endVersion + changesToQueue.length
+ expect(redisState.headVersion).to.equal(expectedVersionInRedis)
+ expect(redisState.headSnapshot).to.deep.equal(expectedSnapshot.toRaw())
+ expect(redisState.changes).to.deep.equal(changesToQueue.map(c => c.toRaw()))
+ })
+
+ it('queues changes when redis has a snapshot with existing changes', async function () {
+ const projectId = fixtures.docs.uninitializedProject.id
+
+ // Initialise the project in redis using the "Hello World" test file
+ await chunkStore.initializeProject(projectId)
+ const blobStore = new BlobStore(projectId)
+ await blobStore.putFile(testFiles.path('hello.txt'))
+ const initialChangeOp = new AddFileOperation(
+ 'existing.tex',
+ File.fromHash(testFiles.HELLO_TXT_HASH)
+ )
+ const initialChange = new Change([initialChangeOp], new Date(), [])
+ const initialChangesToQueue = [initialChange]
+ const versionBeforeInitialQueue = 0
+
+ // Queue the initial changes
+ const status = await queueChanges(
+ projectId,
+ initialChangesToQueue,
+ versionBeforeInitialQueue
+ )
+ // Confirm that the initial changes were queued successfully
+ expect(status).to.equal('ok')
+ const versionAfterInitialQueue =
+ versionBeforeInitialQueue + initialChangesToQueue.length
+
+ // Compute the snapshot after the initial changes for later use
+ const initialSnapshot = new Snapshot()
+ await initialSnapshot.loadFiles('hollow', blobStore)
+ for (const change of initialChangesToQueue) {
+ const hollowChange = change.clone()
+ await hollowChange.loadFiles('hollow', blobStore)
+ hollowChange.applyTo(initialSnapshot, { strict: true })
+ }
+
+ // Now prepare some subsequent changes for the queue
+ await blobStore.putFile(testFiles.path('graph.png'))
+ const addFileOp = new AddFileOperation(
+ 'graph.png',
+ File.fromHash(testFiles.GRAPH_PNG_HASH)
+ )
+ const addFileChange = new Change([addFileOp], new Date(), [])
+ const editFileOp = new EditFileOperation(
+ 'existing.tex',
+ new TextOperation()
+ .insert('Hello')
+ .retain(testFiles.HELLO_TXT_UTF8_LENGTH)
+ )
+ const editFileChange = new Change([editFileOp], new Date(), [])
+
+ const subsequentChangesToQueue = [addFileChange, editFileChange]
+ const versionBeforeSubsequentQueue = versionAfterInitialQueue
+
+ // Queue the subsequent changes
+ const subsequentStatus = await queueChanges(
+ projectId,
+ subsequentChangesToQueue,
+ versionBeforeSubsequentQueue
+ )
+ expect(subsequentStatus).to.equal('ok')
+
+ // Compute the expected snapshot after applying all changes
+ const expectedSnapshot = initialSnapshot.clone()
+ await expectedSnapshot.loadFiles('hollow', blobStore)
+ for (const change of subsequentChangesToQueue) {
+ const hollowChange = change.clone()
+ await hollowChange.loadFiles('hollow', blobStore)
+ hollowChange.applyTo(expectedSnapshot, { strict: true })
+ }
+
+ // Confirm that state in redis matches the expected snapshot and changes queue
+ const finalRedisState = await redisBackend.getState(projectId)
+ expect(finalRedisState).to.not.be.null
+ const expectedFinalVersion =
+ versionBeforeSubsequentQueue + subsequentChangesToQueue.length
+ expect(finalRedisState.headVersion).to.equal(expectedFinalVersion)
+ expect(finalRedisState.headSnapshot).to.deep.equal(expectedSnapshot.toRaw())
+ const allQueuedChangesRaw = initialChangesToQueue
+ .concat(subsequentChangesToQueue)
+ .map(c => c.toRaw())
+ expect(finalRedisState.changes).to.deep.equal(allQueuedChangesRaw)
+ })
+
+ it('skips queuing changes when there is no snapshot and the onlyIfExists flag is set', async function () {
+ // Start with an empty chunk store for the project
+ const projectId = fixtures.docs.uninitializedProject.id
+ await chunkStore.initializeProject(projectId)
+
+ // Ensure that the initial state in redis is empty
+ const initialRedisState = await redisBackend.getState(projectId)
+ expect(initialRedisState.headVersion).to.be.null
+ expect(initialRedisState.headSnapshot).to.be.null
+ expect(initialRedisState.changes).to.be.an('array').that.is.empty
+
+ // Add a test file to the blob store
+ const blobStore = new BlobStore(projectId)
+ await blobStore.putFile(testFiles.path('hello.txt'))
+
+ // Prepare an initial change to add a single file to an empty project
+ const change = new Change(
+ [
+ new AddFileOperation(
+ 'test.tex',
+ File.fromHash(testFiles.HELLO_TXT_HASH)
+ ),
+ ],
+ new Date(),
+ []
+ )
+ const changesToQueue = [change]
+ const endVersion = 0
+
+ // Queue the changes to add the test file
+ const status = await queueChanges(projectId, changesToQueue, endVersion, {
+ onlyIfExists: true,
+ })
+ expect(status).to.equal('ignore')
+
+ // Verify that the state in redis has not changed
+ const redisState = await redisBackend.getState(projectId)
+ expect(redisState).to.deep.equal(initialRedisState)
+ })
+
+ it('creates an initial hollow snapshot when redis has no snapshot (falls back to chunkStore with an empty chunk)', async function () {
+ // Start with an empty chunk store for the project
+ const projectId = fixtures.docs.uninitializedProject.id
+ await chunkStore.initializeProject(projectId)
+ const blobStore = new BlobStore(projectId)
+ await blobStore.putFile(testFiles.path('hello.txt'))
+
+ // Prepare an initial change to add a single file to an empty project
+ const change = new Change(
+ [
+ new AddFileOperation(
+ 'test.tex',
+ File.fromHash(testFiles.HELLO_TXT_HASH)
+ ),
+ ],
+ new Date(),
+ []
+ )
+ const changesToQueue = [change]
+ const endVersion = 0
+
+ // Queue the changes to add the test file
+ const status = await queueChanges(projectId, changesToQueue, endVersion)
+ expect(status).to.equal('ok')
+
+ // Verify that we now have some state in redis
+ const redisState = await redisBackend.getState(projectId)
+ expect(redisState).to.not.be.null
+ expect(redisState.headSnapshot.files['test.tex']).to.deep.equal({
+ stringLength: testFiles.HELLO_TXT_UTF8_LENGTH,
+ })
+ })
+
+ it('throws ConflictingEndVersion if endVersion does not match current version (from chunkStore)', async function () {
+ const projectId = fixtures.docs.uninitializedProject.id
+ // Initialise an empty project in the chunk store
+ await chunkStore.initializeProject(projectId)
+
+ // Ensure that the initial state in redis is empty
+ const initialRedisState = await redisBackend.getState(projectId)
+ expect(initialRedisState.headVersion).to.be.null
+
+ // Prepare a change to add a file
+ const change = new Change(
+ [new AddFileOperation('test.tex', File.fromString(''))],
+ new Date(),
+ []
+ )
+ const changesToQueue = [change]
+ const incorrectEndVersion = 1
+
+ // Attempt to queue the changes with an incorrect endVersion (1 instead of 0)
+ await expect(queueChanges(projectId, changesToQueue, incorrectEndVersion))
+ .to.be.rejectedWith(Chunk.ConflictingEndVersion)
+ .and.eventually.satisfies(err => {
+ expect(err.info).to.have.property(
+ 'clientEndVersion',
+ incorrectEndVersion
+ )
+ expect(err.info).to.have.property('latestEndVersion', 0)
+ return true
+ })
+
+ // Verify that the state in redis has not changed
+ const redisStateAfterError = await redisBackend.getState(projectId)
+ expect(redisStateAfterError).to.deep.equal(initialRedisState)
+ })
+
+ it('throws ConflictingEndVersion if endVersion does not match current version (from redis snapshot)', async function () {
+ const projectId = fixtures.docs.uninitializedProject.id
+
+ // Initialise the project in the redis with a test file
+ await chunkStore.initializeProject(projectId)
+ const initialChange = new Change(
+ [new AddFileOperation('initial.tex', File.fromString('content'))],
+ new Date(),
+ []
+ )
+ const initialChangesToQueue = [initialChange]
+ const versionBeforeInitialQueue = 0
+
+ // Queue the initial changes
+ await queueChanges(
+ projectId,
+ initialChangesToQueue,
+ versionBeforeInitialQueue
+ )
+ const versionInRedisAfterSetup =
+ versionBeforeInitialQueue + initialChangesToQueue.length
+
+ // Confirm that the initial changes were queued successfully
+ const initialRedisState = await redisBackend.getState(projectId)
+ expect(initialRedisState).to.not.be.null
+ expect(initialRedisState.headVersion).to.equal(versionInRedisAfterSetup)
+
+ // Now prepare a subsequent change for the queue
+ const subsequentChange = new Change(
+ [new AddFileOperation('another.tex', File.fromString(''))],
+ new Date(),
+ []
+ )
+ const subsequentChangesToQueue = [subsequentChange]
+ const incorrectEndVersion = 0
+
+ // Attempt to queue the changes with an incorrect endVersion (0 instead of 1)
+ await expect(
+ queueChanges(projectId, subsequentChangesToQueue, incorrectEndVersion)
+ )
+ .to.be.rejectedWith(Chunk.ConflictingEndVersion)
+ .and.eventually.satisfies(err => {
+ expect(err.info).to.have.property(
+ 'clientEndVersion',
+ incorrectEndVersion
+ )
+ expect(err.info).to.have.property(
+ 'latestEndVersion',
+ versionInRedisAfterSetup
+ )
+ return true
+ })
+
+ // Verify that the state in redis has not changed
+ const redisStateAfterError = await redisBackend.getState(projectId)
+ expect(redisStateAfterError).to.not.be.null
+ expect(redisStateAfterError).to.deep.equal(initialRedisState)
+ })
+})
diff --git a/services/history-v1/test/acceptance/js/storage/support/MockFilestore.mjs b/services/history-v1/test/acceptance/js/storage/support/MockFilestore.mjs
new file mode 100644
index 0000000000..55d0923c34
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/support/MockFilestore.mjs
@@ -0,0 +1,54 @@
+import express from 'express'
+
+class MockFilestore {
+ constructor() {
+ this.host = process.env.FILESTORE_HOST || '127.0.0.1'
+ this.port = process.env.FILESTORE_PORT || 3009
+ // create a server listening on this.host and this.port
+ this.files = {}
+
+ this.app = express()
+
+ this.app.get('/project/:projectId/file/:fileId', (req, res) => {
+ const { projectId, fileId } = req.params
+ const content = this.files[projectId]?.[fileId]
+ if (!content) return res.status(404).end()
+ res.status(200).end(content)
+ })
+ }
+
+ start() {
+ // reset stored files
+ this.files = {}
+ // start the server
+ if (this.serverPromise) {
+ return this.serverPromise
+ } else {
+ this.serverPromise = new Promise((resolve, reject) => {
+ this.server = this.app.listen(this.port, this.host, err => {
+ if (err) return reject(err)
+ resolve()
+ })
+ })
+ return this.serverPromise
+ }
+ }
+
+ addFile(projectId, fileId, fileContent) {
+ if (!this.files[projectId]) {
+ this.files[projectId] = {}
+ }
+ this.files[projectId][fileId] = fileContent
+ }
+
+ deleteObject(projectId, fileId) {
+ if (this.files[projectId]) {
+ delete this.files[projectId][fileId]
+ if (Object.keys(this.files[projectId]).length === 0) {
+ delete this.files[projectId]
+ }
+ }
+ }
+}
+
+export const mockFilestore = new MockFilestore()
diff --git a/services/history-v1/test/acceptance/js/storage/support/cleanup.js b/services/history-v1/test/acceptance/js/storage/support/cleanup.js
index b237d4fc17..4df985d613 100644
--- a/services/history-v1/test/acceptance/js/storage/support/cleanup.js
+++ b/services/history-v1/test/acceptance/js/storage/support/cleanup.js
@@ -1,6 +1,7 @@
const config = require('config')
-const { knex, persistor, mongodb } = require('../../../../../storage')
+const { knex, persistor, mongodb, redis } = require('../../../../../storage')
+const { S3Persistor } = require('@overleaf/object-persistor/src/S3Persistor')
const POSTGRES_TABLES = [
'chunks',
@@ -16,7 +17,6 @@ const MONGO_COLLECTIONS = [
'projectHistoryChunks',
// back_fill_file_hash.test.mjs
- 'deletedFiles',
'deletedProjects',
'projects',
'projectHistoryBackedUpBlobs',
@@ -42,6 +42,11 @@ async function cleanupMongo() {
}
}
+async function cleanupRedis() {
+ await redis.rclientHistory.flushdb()
+ await redis.rclientLock.flushdb()
+}
+
async function cleanupPersistor() {
await Promise.all([
clearBucket(config.get('blobStore.globalBucket')),
@@ -55,16 +60,45 @@ async function clearBucket(name) {
await persistor.deleteDirectory(name, '')
}
+let s3PersistorForBackupCleanup
+
+async function cleanupBackup() {
+ if (!config.has('backupStore')) {
+ return
+ }
+
+ // The backupPersistor refuses to delete short prefixes. Use a low-level S3 persistor.
+ if (!s3PersistorForBackupCleanup) {
+ const { backupPersistor } = await import(
+ '../../../../../storage/lib/backupPersistor.mjs'
+ )
+ s3PersistorForBackupCleanup = new S3Persistor(backupPersistor.settings)
+ }
+ await Promise.all(
+ Object.values(config.get('backupStore')).map(name =>
+ s3PersistorForBackupCleanup.deleteDirectory(name, '')
+ )
+ )
+}
+
async function cleanupEverything() {
// Set the timeout when called in a Mocha test. This function is also called
// in benchmarks where it is not passed a Mocha context.
this.timeout?.(5000)
- await Promise.all([cleanupPostgres(), cleanupMongo(), cleanupPersistor()])
+ await Promise.all([
+ cleanupPostgres(),
+ cleanupMongo(),
+ cleanupPersistor(),
+ cleanupBackup(),
+ cleanupRedis(),
+ ])
}
module.exports = {
postgres: cleanupPostgres,
mongo: cleanupMongo,
persistor: cleanupPersistor,
+ backup: cleanupBackup,
+ redis: cleanupRedis,
everything: cleanupEverything,
}
diff --git a/services/history-v1/test/acceptance/js/storage/support/redis.js b/services/history-v1/test/acceptance/js/storage/support/redis.js
new file mode 100644
index 0000000000..3f5b9cda27
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/support/redis.js
@@ -0,0 +1,75 @@
+'use strict'
+
+const { Snapshot } = require('overleaf-editor-core')
+const redis = require('../../../../../storage/lib/redis')
+const redisBackend = require('../../../../../storage/lib/chunk_store/redis')
+const rclient = redis.rclientHistory
+const keySchema = redisBackend.keySchema
+
+// Helper to set up a basic project state in Redis
+async function setupProjectState(
+ projectId,
+ {
+ headVersion = 0,
+ persistedVersion = null,
+ expireTime = null,
+ persistTime = null,
+ changes = [],
+ expireTimeFuture = false, // Default to not setting future expire time unless specified
+ }
+) {
+ const headSnapshot = new Snapshot()
+ await rclient.set(
+ keySchema.head({ projectId }),
+ JSON.stringify(headSnapshot.toRaw())
+ )
+ await rclient.set(
+ keySchema.headVersion({ projectId }),
+ headVersion.toString()
+ )
+
+ if (persistedVersion !== null) {
+ await rclient.set(
+ keySchema.persistedVersion({ projectId }),
+ persistedVersion.toString()
+ )
+ } else {
+ await rclient.del(keySchema.persistedVersion({ projectId }))
+ }
+
+ if (expireTime !== null) {
+ await rclient.set(
+ keySchema.expireTime({ projectId }),
+ expireTime.toString()
+ )
+ } else {
+ // If expireTimeFuture is true, set it to a future time, otherwise delete it if null
+ if (expireTimeFuture) {
+ const futureExpireTime = Date.now() + 5 * 60 * 1000 // 5 minutes in the future
+ await rclient.set(
+ keySchema.expireTime({ projectId }),
+ futureExpireTime.toString()
+ )
+ } else {
+ await rclient.del(keySchema.expireTime({ projectId }))
+ }
+ }
+
+ if (persistTime !== null) {
+ await rclient.set(
+ keySchema.persistTime({ projectId }),
+ persistTime.toString()
+ )
+ } else {
+ await rclient.del(keySchema.persistTime({ projectId }))
+ }
+
+ if (changes.length > 0) {
+ const rawChanges = changes.map(c => JSON.stringify(c.toRaw()))
+ await rclient.rpush(keySchema.changes({ projectId }), ...rawChanges)
+ } else {
+ await rclient.del(keySchema.changes({ projectId }))
+ }
+}
+
+module.exports = { setupProjectState, rclient, keySchema }
diff --git a/services/history-v1/test/acceptance/js/storage/support/runscript.js b/services/history-v1/test/acceptance/js/storage/support/runscript.js
new file mode 100644
index 0000000000..7ff8355566
--- /dev/null
+++ b/services/history-v1/test/acceptance/js/storage/support/runscript.js
@@ -0,0 +1,35 @@
+'use strict'
+
+const { promisify } = require('node:util')
+const { execFile } = require('node:child_process')
+
+async function runScript(scriptPath, options = {}) {
+ const TIMEOUT = options.timeout || 10 * 1000 // 10 seconds default
+ let result
+ try {
+ result = await promisify(execFile)('node', [scriptPath], {
+ encoding: 'utf-8',
+ timeout: TIMEOUT,
+ env: {
+ ...process.env,
+ LOG_LEVEL: 'debug', // Override LOG_LEVEL for script output
+ },
+ })
+ result.status = 0
+ } catch (err) {
+ const { stdout, stderr, code } = err
+ if (typeof code !== 'number') {
+ console.error(`Error running script ${scriptPath}:`, err)
+ throw err
+ }
+ result = { stdout, stderr, status: code }
+ }
+ // The script might exit with status 1 if it finds no keys to process, which is ok
+ if (result.status !== 0 && result.status !== 1) {
+ console.error(`Script ${scriptPath} failed:`, result.stderr)
+ throw new Error(`Script ${scriptPath} failed with status ${result.status}`)
+ }
+ return result
+}
+
+module.exports = { runScript }
diff --git a/services/history-v1/test/acceptance/js/storage/tasks.test.js b/services/history-v1/test/acceptance/js/storage/tasks.test.js
index 04f9cd12c3..e43bdac79f 100644
--- a/services/history-v1/test/acceptance/js/storage/tasks.test.js
+++ b/services/history-v1/test/acceptance/js/storage/tasks.test.js
@@ -76,9 +76,13 @@ describe('tasks', function () {
await mongodb.chunks.insertMany(mongoChunks)
await Promise.all([
...postgresChunks.map(chunk =>
- historyStore.storeRaw(postgresProjectId.toString(), chunk.chunk_id, {
- history: 'raw history',
- })
+ historyStore.storeRaw(
+ postgresProjectId.toString(),
+ chunk.chunk_id.toString(),
+ {
+ history: 'raw history',
+ }
+ )
),
...mongoChunks.map(chunk =>
historyStore.storeRaw(mongoProjectId.toString(), chunk._id.toString(), {
diff --git a/services/history-v1/test/acceptance/pg-init/set-up-readOnly-user.sql b/services/history-v1/test/acceptance/pg-init/set-up-readOnly-user.sql
new file mode 100644
index 0000000000..3a1abf3b60
--- /dev/null
+++ b/services/history-v1/test/acceptance/pg-init/set-up-readOnly-user.sql
@@ -0,0 +1,2 @@
+CREATE USER read_only PASSWORD 'password';
+ALTER DEFAULT PRIVILEGES FOR USER overleaf IN SCHEMA public GRANT SELECT ON TABLES TO read_only;
diff --git a/services/history-v1/test/setup.js b/services/history-v1/test/setup.js
index 20f891ceb6..60974173de 100644
--- a/services/history-v1/test/setup.js
+++ b/services/history-v1/test/setup.js
@@ -2,23 +2,26 @@ const chai = require('chai')
const chaiAsPromised = require('chai-as-promised')
const config = require('config')
const fetch = require('node-fetch')
-const { knex, mongodb } = require('../storage')
+const { knex, mongodb, redis } = require('../storage')
// ensure every ObjectId has the id string as a property for correct comparisons
require('mongodb').ObjectId.cacheHexString = true
chai.use(chaiAsPromised)
+chai.config.truncateThreshold = 0
async function setupPostgresDatabase() {
+ this.timeout(60_000)
await knex.migrate.latest()
}
async function setupMongoDatabase() {
+ this.timeout(60_000)
await mongodb.db.collection('projectHistoryChunks').createIndexes([
{
key: { projectId: 1, startVersion: 1 },
name: 'projectId_1_startVersion_1',
- partialFilterExpression: { state: 'active' },
+ partialFilterExpression: { state: { $in: ['active', 'closed'] } },
unique: true,
},
{
@@ -30,6 +33,7 @@ async function setupMongoDatabase() {
}
async function createGcsBuckets() {
+ this.timeout(60_000)
for (const bucket of [
config.get('blobStore.globalBucket'),
config.get('blobStore.projectBucket'),
@@ -49,6 +53,7 @@ async function createGcsBuckets() {
// can exit.
async function tearDownConnectionPool() {
await knex.destroy()
+ await redis.disconnect()
}
module.exports = {
diff --git a/services/history-v1/tsconfig.json b/services/history-v1/tsconfig.json
index 0688609f41..0e20309d3d 100644
--- a/services/history-v1/tsconfig.json
+++ b/services/history-v1/tsconfig.json
@@ -6,6 +6,7 @@
"app/js/**/*",
"backup-deletion-app.mjs",
"backup-verifier-app.mjs",
+ "backup-worker-app.mjs",
"benchmarks/**/*",
"config/**/*",
"migrations/**/*",
diff --git a/services/notifications/.gitignore b/services/notifications/.gitignore
deleted file mode 100644
index 8a030e9aff..0000000000
--- a/services/notifications/.gitignore
+++ /dev/null
@@ -1,54 +0,0 @@
-Compiled source #
-###################
-*.com
-*.class
-*.dll
-*.exe
-*.o
-*.so
-
-# Packages #
-############
-# it's better to unpack these files and commit the raw source
-# git has its own built in compression methods
-*.7z
-*.dmg
-*.gz
-*.iso
-*.jar
-*.rar
-*.tar
-*.zip
-
-# Logs and databases #
-######################
-*.log
-*.sql
-*.sqlite
-
-# OS generated files #
-######################
-.DS_Store?
-ehthumbs.db
-Icon?
-Thumbs.db
-
-node_modules/*
-data/*
-
-cookies.txt
-UserAndProjectPopulator.coffee
-
-public/stylesheets/style.css
-
-Gemfile.lock
-
-*.swp
-.DS_Store
-
-app/views/external
-
-/modules/
-
-# managed by dev-environment$ bin/update_build_scripts
-.npmrc
diff --git a/services/notifications/.nvmrc b/services/notifications/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/notifications/.nvmrc
+++ b/services/notifications/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/notifications/Dockerfile b/services/notifications/Dockerfile
index c4d13cc422..0d0581f113 100644
--- a/services/notifications/Dockerfile
+++ b/services/notifications/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/notifications
diff --git a/services/notifications/Makefile b/services/notifications/Makefile
index 192452a54a..f8440d97e2 100644
--- a/services/notifications/Makefile
+++ b/services/notifications/Makefile
@@ -32,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -63,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -98,13 +116,6 @@ test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) up -d mongo
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) exec -T mongo sh -c ' \
- while ! mongosh --eval "db.version()" > /dev/null; do \
- echo "Waiting for Mongo..."; \
- sleep 1; \
- done; \
- mongosh --eval "rs.initiate({ _id: \"overleaf\", members: [ { _id: 0, host: \"mongo:27017\" } ] })"'
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif
@@ -137,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/notifications/buildscript.txt b/services/notifications/buildscript.txt
index 657375f9aa..1950c5c251 100644
--- a/services/notifications/buildscript.txt
+++ b/services/notifications/buildscript.txt
@@ -4,6 +4,6 @@ notifications
--env-add=
--env-pass-through=
--esmock-loader=False
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=True
---script-version=4.5.0
+--script-version=4.7.0
diff --git a/services/notifications/docker-compose.ci.yml b/services/notifications/docker-compose.ci.yml
index 6f1a608534..ca3303a079 100644
--- a/services/notifications/docker-compose.ci.yml
+++ b/services/notifications/docker-compose.ci.yml
@@ -24,10 +24,13 @@ services:
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
+ volumes:
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
user: node
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run test:acceptance
@@ -39,9 +42,14 @@ services:
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/notifications/docker-compose.yml b/services/notifications/docker-compose.yml
index 73483e4371..e43e9aeef5 100644
--- a/services/notifications/docker-compose.yml
+++ b/services/notifications/docker-compose.yml
@@ -6,7 +6,7 @@ version: "2.3"
services:
test_unit:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/notifications
- ../../node_modules:/overleaf/node_modules
@@ -14,37 +14,45 @@ services:
working_dir: /overleaf/services/notifications
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/notifications
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
working_dir: /overleaf/services/notifications
environment:
ELASTIC_SEARCH_DSN: es:9200
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run --silent test:acceptance
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/notifications/package.json b/services/notifications/package.json
index 80e61b7269..a591e897c9 100644
--- a/services/notifications/package.json
+++ b/services/notifications/package.json
@@ -25,7 +25,7 @@
"async": "^3.2.5",
"body-parser": "^1.20.3",
"bunyan": "^1.8.15",
- "express": "^4.21.0",
+ "express": "^4.21.2",
"method-override": "^3.0.0",
"mongodb-legacy": "6.1.3",
"request": "^2.88.2"
@@ -33,7 +33,7 @@
"devDependencies": {
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
- "mocha": "^10.2.0",
+ "mocha": "^11.1.0",
"sandboxed-module": "^2.0.4",
"sinon": "^9.2.4",
"typescript": "^5.0.4"
diff --git a/services/project-history/.gitignore b/services/project-history/.gitignore
deleted file mode 100644
index 25328fed2e..0000000000
--- a/services/project-history/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-**.swp
-node_modules/
-forever/
-.config
-.npm
-
-# managed by dev-environment$ bin/update_build_scripts
-.npmrc
diff --git a/services/project-history/.nvmrc b/services/project-history/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/project-history/.nvmrc
+++ b/services/project-history/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/project-history/Dockerfile b/services/project-history/Dockerfile
index c51bb56195..0d719cbfc6 100644
--- a/services/project-history/Dockerfile
+++ b/services/project-history/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/project-history
diff --git a/services/project-history/Makefile b/services/project-history/Makefile
index bda77e1d2a..a1d116253d 100644
--- a/services/project-history/Makefile
+++ b/services/project-history/Makefile
@@ -32,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json --volume $(MONOREPO)/services/document-updater/app/js/types.ts:/overleaf/services/document-updater/app/js/types.ts ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -63,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -98,13 +116,6 @@ test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) up -d mongo
- $(DOCKER_COMPOSE_TEST_ACCEPTANCE) exec -T mongo sh -c ' \
- while ! mongosh --eval "db.version()" > /dev/null; do \
- echo "Waiting for Mongo..."; \
- sleep 1; \
- done; \
- mongosh --eval "rs.initiate({ _id: \"overleaf\", members: [ { _id: 0, host: \"mongo:27017\" } ] })"'
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif
@@ -137,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/project-history/app.js b/services/project-history/app.js
index 1c2f93bfbc..a72af4f1b2 100644
--- a/services/project-history/app.js
+++ b/services/project-history/app.js
@@ -14,10 +14,11 @@ mongoClient
.connect()
.then(() => {
app.listen(port, host, error => {
- if (error != null) {
- logger.error(OError.tag(error, 'could not start history server'))
+ if (error) {
+ error = OError.tag(error, 'could not start history server')
+ logger.error({ error }, error.message)
} else {
- logger.debug(`history starting up, listening on ${host}:${port}`)
+ logger.debug({}, `history starting up, listening on ${host}:${port}`)
}
})
})
diff --git a/services/project-history/app/js/BlobManager.js b/services/project-history/app/js/BlobManager.js
index a6ca8417ca..8f62204ee0 100644
--- a/services/project-history/app/js/BlobManager.js
+++ b/services/project-history/app/js/BlobManager.js
@@ -1,3 +1,4 @@
+import _ from 'lodash'
import async from 'async'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
@@ -37,6 +38,7 @@ export function createBlobsForUpdates(
let attempts = 0
// Since we may be creating O(1000) blobs in an update, allow for the
// occasional failure to prevent the whole update failing.
+ let lastErr
async.retry(
{
times: RETRY_ATTEMPTS,
@@ -46,14 +48,28 @@ export function createBlobsForUpdates(
attempts++
if (attempts > 1) {
logger.error(
- { projectId, doc: update.doc, file: update.file, attempts },
+ {
+ err: lastErr,
+ projectId,
+ historyId,
+ update: _.pick(
+ update,
+ 'doc',
+ 'file',
+ 'hash',
+ 'createdBlob',
+ 'url'
+ ),
+ attempts,
+ },
'previous createBlob attempt failed, retrying'
)
}
// extend the lock for each file because large files may take a long time
extendLock(err => {
if (err) {
- return _cb(OError.tag(err))
+ lastErr = OError.tag(err)
+ return _cb(lastErr)
}
HistoryStoreManager.createBlobForUpdate(
projectId,
@@ -61,12 +77,12 @@ export function createBlobsForUpdates(
update,
(err, hashes) => {
if (err) {
- OError.tag(err, 'retry: error creating blob', {
+ lastErr = OError.tag(err, 'retry: error creating blob', {
projectId,
doc: update.doc,
file: update.file,
})
- _cb(err)
+ _cb(lastErr)
} else {
_cb(null, hashes)
}
diff --git a/services/project-history/app/js/ErrorRecorder.js b/services/project-history/app/js/ErrorRecorder.js
index 3c0570f822..648b53f569 100644
--- a/services/project-history/app/js/ErrorRecorder.js
+++ b/services/project-history/app/js/ErrorRecorder.js
@@ -1,54 +1,57 @@
+// @ts-check
+
import { callbackify } from 'node:util'
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
+import OError from '@overleaf/o-error'
import { db } from './mongodb.js'
+/**
+ * @import { ProjectHistoryFailure } from './mongo-types'
+ */
+
+/**
+ * @param {string} projectId
+ * @param {number} queueSize
+ * @param {Error} error
+ * @return {Promise} the failure record
+ */
async function record(projectId, queueSize, error) {
- if (error != null) {
- const errorRecord = {
- queueSize,
- error: error.toString(),
- stack: error.stack,
- ts: new Date(),
- }
- logger.debug(
- { projectId, errorRecord },
- 'recording failed attempt to process updates'
- )
- try {
- await db.projectHistoryFailures.updateOne(
- { project_id: projectId },
- {
- $set: errorRecord,
- $inc: { attempts: 1 },
- $push: {
- history: {
- $each: [errorRecord],
- $position: 0,
- $slice: 10,
- },
- }, // only keep recent failures
- },
- { upsert: true }
- )
- } catch (mongoError) {
- logger.error(
- { projectId, mongoError },
- 'failed to change project statues in mongo'
- )
- }
- throw error
- } else {
- try {
- await db.projectHistoryFailures.deleteOne({ project_id: projectId })
- } catch (mongoError) {
- logger.error(
- { projectId, mongoError },
- 'failed to change project statues in mongo'
- )
- }
- return queueSize
+ const errorRecord = {
+ queueSize,
+ error: error.toString(),
+ stack: error.stack ?? '',
+ ts: new Date(),
}
+ logger.debug(
+ { projectId, errorRecord },
+ 'recording failed attempt to process updates'
+ )
+ const result = await db.projectHistoryFailures.findOneAndUpdate(
+ { project_id: projectId },
+ {
+ $set: errorRecord,
+ $inc: { attempts: 1 },
+ $push: {
+ history: {
+ $each: [errorRecord],
+ $position: 0,
+ // only keep recent failures
+ $slice: 10,
+ },
+ },
+ },
+ { upsert: true, returnDocument: 'after', includeResultMetadata: true }
+ )
+ if (result.value == null) {
+ // Since we upsert, the result should always have a value
+ throw new OError('no value returned when recording an error', { projectId })
+ }
+ return result.value
+}
+
+async function clearError(projectId) {
+ await db.projectHistoryFailures.deleteOne({ project_id: projectId })
}
async function setForceDebug(projectId, state) {
@@ -83,6 +86,9 @@ async function recordSyncStart(projectId) {
)
}
+/**
+ * @param projectId
+ */
async function getFailureRecord(projectId) {
return await db.projectHistoryFailures.findOne({ project_id: projectId })
}
@@ -234,6 +240,7 @@ const getFailureRecordCb = callbackify(getFailureRecord)
const getFailuresCb = callbackify(getFailures)
const getLastFailureCb = callbackify(getLastFailure)
const recordCb = callbackify(record)
+const clearErrorCb = callbackify(clearError)
const recordSyncStartCb = callbackify(recordSyncStart)
const setForceDebugCb = callbackify(setForceDebug)
@@ -243,6 +250,7 @@ export {
getLastFailureCb as getLastFailure,
getFailuresCb as getFailures,
recordCb as record,
+ clearErrorCb as clearError,
recordSyncStartCb as recordSyncStart,
setForceDebugCb as setForceDebug,
}
@@ -253,6 +261,7 @@ export const promises = {
getLastFailure,
getFailures,
record,
+ clearError,
recordSyncStart,
setForceDebug,
}
diff --git a/services/project-history/app/js/Errors.js b/services/project-history/app/js/Errors.js
index 54aab952a9..0b8d24b0d2 100644
--- a/services/project-history/app/js/Errors.js
+++ b/services/project-history/app/js/Errors.js
@@ -8,3 +8,4 @@ export class InconsistentChunkError extends OError {}
export class UpdateWithUnknownFormatError extends OError {}
export class UnexpectedOpTypeError extends OError {}
export class TooManyRequestsError extends OError {}
+export class NeedFullProjectStructureResyncError extends OError {}
diff --git a/services/project-history/app/js/FlushManager.js b/services/project-history/app/js/FlushManager.js
index 8c1870bcb5..455a4f56f7 100644
--- a/services/project-history/app/js/FlushManager.js
+++ b/services/project-history/app/js/FlushManager.js
@@ -11,6 +11,7 @@ import async from 'async'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import metrics from '@overleaf/metrics'
+import Settings from '@overleaf/settings'
import _ from 'lodash'
import * as RedisManager from './RedisManager.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
@@ -37,6 +38,13 @@ export function flushIfOld(projectId, cutoffTime, callback) {
)
metrics.inc('flush-old-updates', 1, { status: 'flushed' })
return UpdatesProcessor.processUpdatesForProject(projectId, callback)
+ } else if (Settings.shortHistoryQueues.includes(projectId)) {
+ logger.debug(
+ { projectId, firstOpTimestamp, cutoffTime },
+ 'flushing project with short queue'
+ )
+ metrics.inc('flush-old-updates', 1, { status: 'short-queue' })
+ return UpdatesProcessor.processUpdatesForProject(projectId, callback)
} else {
metrics.inc('flush-old-updates', 1, { status: 'skipped' })
return callback()
@@ -106,7 +114,7 @@ export function flushOldOps(options, callback) {
return flushIfOld(projectId, cutoffTime, function (err) {
if (err != null) {
logger.warn(
- { projectId, flushErr: err },
+ { projectId, err },
'error flushing old project'
)
}
diff --git a/services/project-history/app/js/HealthChecker.js b/services/project-history/app/js/HealthChecker.js
index 74f1272e1e..c57f184aad 100644
--- a/services/project-history/app/js/HealthChecker.js
+++ b/services/project-history/app/js/HealthChecker.js
@@ -29,7 +29,6 @@ export function check(callback) {
OError.tag(err, 'error checking lock for health check', {
project_id: projectId,
})
- logger.err(err)
return cb(err)
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(new Error(`status code not 200, it's ${res.statusCode}`))
@@ -46,7 +45,6 @@ export function check(callback) {
OError.tag(err, 'error flushing for health check', {
project_id: projectId,
})
- logger.err(err)
return cb(err)
} else if ((res != null ? res.statusCode : undefined) !== 204) {
return cb(new Error(`status code not 204, it's ${res.statusCode}`))
@@ -63,7 +61,6 @@ export function check(callback) {
OError.tag(err, 'error getting updates for health check', {
project_id: projectId,
})
- logger.err(err)
return cb(err)
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(new Error(`status code not 200, it's ${res.statusCode}`))
diff --git a/services/project-history/app/js/HistoryStoreManager.js b/services/project-history/app/js/HistoryStoreManager.js
index 641eb0f2fd..38658bdf5b 100644
--- a/services/project-history/app/js/HistoryStoreManager.js
+++ b/services/project-history/app/js/HistoryStoreManager.js
@@ -17,6 +17,7 @@ import * as Errors from './Errors.js'
import * as LocalFileWriter from './LocalFileWriter.js'
import * as HashManager from './HashManager.js'
import * as HistoryBlobTranslator from './HistoryBlobTranslator.js'
+import { promisifyMultiResult } from '@overleaf/promise-utils'
const HTTP_REQUEST_TIMEOUT = Settings.overleaf.history.requestTimeout
@@ -34,7 +35,10 @@ class StringStream extends stream.Readable {
_mocks.getMostRecentChunk = (projectId, historyId, callback) => {
const path = `projects/${historyId}/latest/history`
logger.debug({ projectId, historyId }, 'getting chunk from history service')
- _requestChunk({ path, json: true }, callback)
+ _requestChunk({ path, json: true }, (err, chunk) => {
+ if (err) return callback(OError.tag(err))
+ callback(null, chunk)
+ })
}
/**
@@ -53,7 +57,10 @@ export function getChunkAtVersion(projectId, historyId, version, callback) {
{ projectId, historyId, version },
'getting chunk from history service for version'
)
- _requestChunk({ path, json: true }, callback)
+ _requestChunk({ path, json: true }, (err, chunk) => {
+ if (err) return callback(OError.tag(err))
+ callback(null, chunk)
+ })
}
export function getMostRecentVersion(projectId, historyId, callback) {
@@ -67,8 +74,10 @@ export function getMostRecentVersion(projectId, historyId, callback) {
_.sortBy(chunk.chunk.history.changes || [], x => x.timestamp)
)
// find the latest project and doc versions in the chunk
- _getLatestProjectVersion(projectId, chunk, (err1, projectVersion) =>
+ _getLatestProjectVersion(projectId, chunk, (err1, projectVersion) => {
+ if (err1) err1 = OError.tag(err1)
_getLatestV2DocVersions(projectId, chunk, (err2, v2DocVersions) => {
+ if (err2) err2 = OError.tag(err2)
// return the project and doc versions
const projectStructureAndDocVersions = {
project: projectVersion,
@@ -78,10 +87,36 @@ export function getMostRecentVersion(projectId, historyId, callback) {
err1 || err2,
mostRecentVersion,
projectStructureAndDocVersions,
- lastChange
+ lastChange,
+ chunk
)
})
- )
+ })
+ })
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} historyId
+ * @param {Object} opts
+ * @param {boolean} [opts.readOnly]
+ * @param {(error: Error, rawChunk?: { startVersion: number, endVersion: number, endTimestamp: Date}) => void} callback
+ */
+export function getMostRecentVersionRaw(projectId, historyId, opts, callback) {
+ const path = `projects/${historyId}/latest/history/raw`
+ logger.debug(
+ { projectId, historyId },
+ 'getting raw chunk from history service'
+ )
+ const qs = opts.readOnly ? { readOnly: true } : {}
+ _requestHistoryService({ path, json: true, qs }, (err, body) => {
+ if (err) return callback(OError.tag(err))
+ const { startVersion, endVersion, endTimestamp } = body
+ callback(null, {
+ startVersion,
+ endVersion,
+ endTimestamp: new Date(endTimestamp),
+ })
})
}
@@ -95,7 +130,8 @@ function _requestChunk(options, callback) {
chunk.chunk == null ||
chunk.chunk.startVersion == null
) {
- return callback(new OError('unexpected response'))
+ const { path } = options
+ return callback(new OError('unexpected response', { path }))
}
callback(null, chunk)
})
@@ -103,28 +139,36 @@ function _requestChunk(options, callback) {
function _getLatestProjectVersion(projectId, chunk, callback) {
// find the initial project version
- let projectVersion =
- chunk.chunk.history.snapshot && chunk.chunk.history.snapshot.projectVersion
- // keep track of any errors
+ const projectVersionInSnapshot = chunk.chunk.history.snapshot?.projectVersion
+ let projectVersion = projectVersionInSnapshot
+ const chunkStartVersion = chunk.chunk.startVersion
+ // keep track of any first error
let error = null
// iterate over the changes in chunk to find the most recent project version
- for (const change of chunk.chunk.history.changes || []) {
- if (change.projectVersion != null) {
+ for (const [changeIdx, change] of (
+ chunk.chunk.history.changes || []
+ ).entries()) {
+ const projectVersionInChange = change.projectVersion
+ if (projectVersionInChange != null) {
if (
projectVersion != null &&
- Versions.lt(change.projectVersion, projectVersion)
+ Versions.lt(projectVersionInChange, projectVersion)
) {
- logger.warn(
- { projectId, chunk, projectVersion, change },
- 'project structure version out of order in chunk'
- )
if (!error) {
error = new Errors.OpsOutOfOrderError(
- 'project structure version out of order'
+ 'project structure version out of order',
+ {
+ projectId,
+ chunkStartVersion,
+ projectVersionInSnapshot,
+ changeIdx,
+ projectVersion,
+ projectVersionInChange,
+ }
)
}
} else {
- projectVersion = change.projectVersion
+ projectVersion = projectVersionInChange
}
}
}
@@ -150,16 +194,16 @@ function _getLatestV2DocVersions(projectId, chunk, callback) {
v2DocVersions[docId].v != null &&
Versions.lt(v, v2DocVersions[docId].v)
) {
- logger.warn(
- {
- projectId,
- docId,
- changeVersion: docInfo,
- previousVersion: v2DocVersions[docId],
- },
- 'doc version out of order in chunk'
- )
if (!error) {
+ logger.warn(
+ {
+ projectId,
+ docId,
+ changeVersion: docInfo,
+ previousVersion: v2DocVersions[docId],
+ },
+ 'doc version out of order in chunk'
+ )
error = new Errors.OpsOutOfOrderError('doc version out of order')
}
} else {
@@ -175,7 +219,10 @@ export function getProjectBlob(historyId, blobHash, callback) {
logger.debug({ historyId, blobHash }, 'getting blob from history service')
_requestHistoryService(
{ path: `projects/${historyId}/blobs/${blobHash}` },
- callback
+ (err, blob) => {
+ if (err) return callback(OError.tag(err))
+ callback(null, blob)
+ }
)
}
@@ -213,7 +260,7 @@ export function sendChanges(
method: 'POST',
json: changes,
},
- error => {
+ (error, response) => {
if (error) {
OError.tag(error, 'failed to send changes to v1', {
projectId,
@@ -223,10 +270,9 @@ export function sendChanges(
statusCode: error.statusCode,
body: error.body,
})
- logger.warn(error)
return callback(error)
}
- callback()
+ callback(null, { resyncNeeded: response?.resyncNeeded ?? false })
}
)
}
@@ -242,11 +288,15 @@ function createBlobFromString(historyId, data, fileId, callback) {
(fsPath, cb) => {
_createBlob(historyId, fsPath, cb)
},
- callback
+ (err, hash) => {
+ if (err) return callback(OError.tag(err))
+ callback(null, hash)
+ }
)
}
function _checkBlobExists(historyId, hash, callback) {
+ if (!hash) return callback(null, false)
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${hash}`
fetchNothing(url, {
method: 'HEAD',
@@ -256,7 +306,7 @@ function _checkBlobExists(historyId, hash, callback) {
callback(null, true)
})
.catch(err => {
- if (err instanceof RequestFailedError) {
+ if (err instanceof RequestFailedError && err.response.status === 404) {
return callback(null, false)
}
callback(OError.tag(err), false)
@@ -294,7 +344,7 @@ export function createBlobForUpdate(projectId, historyId, update, callback) {
try {
ranges = HistoryBlobTranslator.createRangeBlobDataFromUpdate(update)
} catch (error) {
- return callback(error)
+ return callback(OError.tag(error))
}
createBlobFromString(
historyId,
@@ -302,7 +352,7 @@ export function createBlobForUpdate(projectId, historyId, update, callback) {
`project-${projectId}-doc-${update.doc}`,
(err, fileHash) => {
if (err) {
- return callback(err)
+ return callback(OError.tag(err))
}
if (ranges) {
createBlobFromString(
@@ -311,7 +361,7 @@ export function createBlobForUpdate(projectId, historyId, update, callback) {
`project-${projectId}-doc-${update.doc}-ranges`,
(err, rangesHash) => {
if (err) {
- return callback(err)
+ return callback(OError.tag(err))
}
logger.debug(
{ fileHash, rangesHash },
@@ -337,11 +387,14 @@ export function createBlobForUpdate(projectId, historyId, update, callback) {
)
_checkBlobExists(historyId, update.hash, (err, blobExists) => {
if (err) {
- logger.warn(
- { err, projectId, fileId, update },
- 'error checking whether blob exists, reading from filestore'
+ return callback(
+ new OError(
+ 'error checking whether blob exists',
+ { projectId, historyId, update },
+ err
+ )
)
- } else if (update.createdBlob && blobExists) {
+ } else if (blobExists) {
logger.debug(
{ projectId, fileId, update },
'Skipping blob creation as it has already been created'
@@ -359,6 +412,9 @@ export function createBlobForUpdate(projectId, historyId, update, callback) {
new OError('no filestore URL provided and blob was not created')
)
}
+ if (!Settings.apis.filestore.enabled) {
+ return callback(new OError('blocking filestore read', { update }))
+ }
fetchStream(filestoreURL, {
signal: AbortSignal.timeout(HTTP_REQUEST_TIMEOUT),
@@ -373,7 +429,7 @@ export function createBlobForUpdate(projectId, historyId, update, callback) {
},
(err, fileHash) => {
if (err) {
- return callback(err)
+ return callback(OError.tag(err))
}
if (update.hash && update.hash !== fileHash) {
logger.warn(
@@ -405,7 +461,7 @@ export function createBlobForUpdate(projectId, historyId, update, callback) {
},
(err, fileHash) => {
if (err) {
- return callback(err)
+ return callback(OError.tag(err))
}
logger.debug({ fileHash }, 'created empty blob for file')
callback(null, { file: fileHash })
@@ -478,7 +534,10 @@ export function initializeProject(historyId, callback) {
export function deleteProject(projectId, callback) {
_requestHistoryService(
{ method: 'DELETE', path: `projects/${projectId}` },
- callback
+ err => {
+ if (err) return callback(OError.tag(err))
+ callback(null)
+ }
)
}
@@ -553,21 +612,27 @@ function _requestHistoryService(options, callback) {
if (res.statusCode >= 200 && res.statusCode < 300) {
callback(null, body)
} else {
+ const { method, url, qs } = requestOptions
error = new OError(
- `history store a non-success status code: ${res.statusCode}`
+ `history store a non-success status code: ${res.statusCode}`,
+ { method, url, qs, statusCode: res.statusCode }
)
- error.statusCode = res.statusCode
- error.body = body
- logger.warn({ err: error }, error.message)
callback(error)
}
})
}
export const promises = {
+ /** @type {(projectId: string, historyId: string) => Promise<{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}>} */
getMostRecentChunk: promisify(getMostRecentChunk),
getChunkAtVersion: promisify(getChunkAtVersion),
- getMostRecentVersion: promisify(getMostRecentVersion),
+ getMostRecentVersion: promisifyMultiResult(getMostRecentVersion, [
+ 'version',
+ 'projectStructureAndDocVersions',
+ 'lastChange',
+ 'mostRecentChunk',
+ ]),
+ getMostRecentVersionRaw: promisify(getMostRecentVersionRaw),
getProjectBlob: promisify(getProjectBlob),
getProjectBlobStream: promisify(getProjectBlobStream),
sendChanges: promisify(sendChanges),
diff --git a/services/project-history/app/js/HttpController.js b/services/project-history/app/js/HttpController.js
index d69585c29e..927248726f 100644
--- a/services/project-history/app/js/HttpController.js
+++ b/services/project-history/app/js/HttpController.js
@@ -238,22 +238,6 @@ export function getFileMetadataSnapshot(req, res, next) {
)
}
-export function getMostRecentChunk(req, res, next) {
- const { project_id: projectId } = req.params
- WebApiManager.getHistoryId(projectId, (error, historyId) => {
- if (error) return next(OError.tag(error))
-
- HistoryStoreManager.getMostRecentChunk(
- projectId,
- historyId,
- (err, data) => {
- if (err) return next(OError.tag(err))
- res.json(data)
- }
- )
- })
-}
-
export function getLatestSnapshot(req, res, next) {
const { project_id: projectId } = req.params
WebApiManager.getHistoryId(projectId, (error, historyId) => {
@@ -272,25 +256,6 @@ export function getLatestSnapshot(req, res, next) {
})
}
-export function getChangesSince(req, res, next) {
- const { project_id: projectId } = req.params
- const { since } = req.query
- WebApiManager.getHistoryId(projectId, (error, historyId) => {
- if (error) return next(OError.tag(error))
- SnapshotManager.getChangesSince(
- projectId,
- historyId,
- since,
- (error, changes) => {
- if (error != null) {
- return next(error)
- }
- res.json(changes.map(c => c.toRaw()))
- }
- )
- })
-}
-
export function getChangesInChunkSince(req, res, next) {
const { project_id: projectId } = req.params
const { since } = req.query
@@ -604,9 +569,7 @@ export function deleteProject(req, res, next) {
if (err) {
return next(err)
}
- // The third parameter to the following call is the error. Calling it
- // with null will remove any failure record for this project.
- ErrorRecorder.record(projectId, 0, null, err => {
+ ErrorRecorder.clearError(projectId, err => {
if (err) {
return next(err)
}
diff --git a/services/project-history/app/js/Metrics.js b/services/project-history/app/js/Metrics.js
new file mode 100644
index 0000000000..b51518963b
--- /dev/null
+++ b/services/project-history/app/js/Metrics.js
@@ -0,0 +1,15 @@
+// @ts-check
+
+import { prom } from '@overleaf/metrics'
+
+export const historyFlushDurationSeconds = new prom.Histogram({
+ name: 'history_flush_duration_seconds',
+ help: 'Duration of a history flush in seconds',
+ buckets: [0.05, 0.1, 0.2, 0.3, 0.5, 1, 2, 5, 10],
+})
+
+export const historyFlushQueueSize = new prom.Histogram({
+ name: 'history_flush_queue_size',
+ help: 'Size of the queue during history flushes',
+ buckets: prom.exponentialBuckets(1, 2, 10),
+})
diff --git a/services/project-history/app/js/RedisManager.js b/services/project-history/app/js/RedisManager.js
index ac3197daf2..2f79a10a91 100644
--- a/services/project-history/app/js/RedisManager.js
+++ b/services/project-history/app/js/RedisManager.js
@@ -136,6 +136,9 @@ async function getUpdatesInBatches(projectId, batchSize, runner) {
moreBatches = true
break
}
+ if (update.resyncProjectStructureOnly) {
+ update._raw = rawUpdate
+ }
rawUpdates.push(rawUpdate)
updates.push(update)
@@ -151,6 +154,26 @@ async function getUpdatesInBatches(projectId, batchSize, runner) {
}
}
+/**
+ * @param {string} projectId
+ * @param {ResyncProjectStructureUpdate} update
+ * @return {Promise}
+ */
+async function deleteAppliedDocUpdate(projectId, update) {
+ const raw = update._raw
+ // Delete the first occurrence of the update with LREM KEY COUNT
+ // VALUE by setting COUNT to 1 which 'removes COUNT elements equal to
+ // value moving from head to tail.'
+ //
+ // If COUNT is 0 the entire list would be searched which would block
+ // redis since it would be an O(N) operation where N is the length of
+ // the queue, in a multi of the batch size.
+ metrics.summary('redis.projectHistoryOps', raw.length, {
+ status: 'lrem',
+ })
+ await rclient.lrem(Keys.projectHistoryOps({ project_id: projectId }), 1, raw)
+}
+
async function deleteAppliedDocUpdates(projectId, updates) {
const multi = rclient.multi()
// Delete all the updates which have been applied (exact match)
@@ -160,7 +183,7 @@ async function deleteAppliedDocUpdates(projectId, updates) {
// value moving from head to tail.'
//
// If COUNT is 0 the entire list would be searched which would block
- // redis snce it would be an O(N) operation where N is the length of
+ // redis since it would be an O(N) operation where N is the length of
// the queue, in a multi of the batch size.
metrics.summary('redis.projectHistoryOps', update.length, {
status: 'lrem',
@@ -275,6 +298,26 @@ async function getFirstOpTimestamp(projectId) {
return firstOpTimestamp
}
+async function getFirstOpTimestamps(projectIds) {
+ const keys = projectIds.map(projectId =>
+ Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
+ )
+ const results = await rclient.mget(keys)
+ const timestamps = results.map(result => {
+ // convert stored time back to a numeric timestamp
+ const timestamp = parseInt(result, 10)
+
+ // check for invalid timestamp
+ if (isNaN(timestamp)) {
+ return null
+ }
+
+ // convert numeric timestamp to a date object
+ return new Date(timestamp)
+ })
+ return timestamps
+}
+
async function clearFirstOpTimestamp(projectId) {
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
await rclient.del(key)
@@ -334,6 +377,7 @@ const getProjectIdsWithHistoryOpsCountCb = callbackify(
)
const setFirstOpTimestampCb = callbackify(setFirstOpTimestamp)
const getFirstOpTimestampCb = callbackify(getFirstOpTimestamp)
+const getFirstOpTimestampsCb = callbackify(getFirstOpTimestamps)
const clearFirstOpTimestampCb = callbackify(clearFirstOpTimestamp)
const getProjectIdsWithFirstOpTimestampsCb = callbackify(
getProjectIdsWithFirstOpTimestamps
@@ -371,6 +415,7 @@ export {
getProjectIdsWithHistoryOpsCountCb as getProjectIdsWithHistoryOpsCount,
setFirstOpTimestampCb as setFirstOpTimestamp,
getFirstOpTimestampCb as getFirstOpTimestamp,
+ getFirstOpTimestampsCb as getFirstOpTimestamps,
clearFirstOpTimestampCb as clearFirstOpTimestamp,
getProjectIdsWithFirstOpTimestampsCb as getProjectIdsWithFirstOpTimestamps,
clearDanglingFirstOpTimestampCb as clearDanglingFirstOpTimestamp,
@@ -383,12 +428,14 @@ export const promises = {
countUnprocessedUpdates,
getRawUpdatesBatch,
deleteAppliedDocUpdates,
+ deleteAppliedDocUpdate,
destroyDocUpdatesQueue,
getUpdatesInBatches,
getProjectIdsWithHistoryOps,
getProjectIdsWithHistoryOpsCount,
setFirstOpTimestamp,
getFirstOpTimestamp,
+ getFirstOpTimestamps,
clearFirstOpTimestamp,
getProjectIdsWithFirstOpTimestamps,
clearDanglingFirstOpTimestamp,
diff --git a/services/project-history/app/js/RetryManager.js b/services/project-history/app/js/RetryManager.js
index 4ae6ce22fc..b146da29f9 100644
--- a/services/project-history/app/js/RetryManager.js
+++ b/services/project-history/app/js/RetryManager.js
@@ -73,11 +73,11 @@ function isTemporaryFailure(failure) {
return TEMPORARY_FAILURES.includes(failure.error)
}
-function isHardFailure(failure) {
+export function isHardFailure(failure) {
return HARD_FAILURES.includes(failure.error)
}
-function isFirstFailure(failure) {
+export function isFirstFailure(failure) {
return failure.attempts <= 1
}
@@ -147,7 +147,7 @@ async function resyncProject(projectId, options = {}) {
try {
if (!/^[0-9a-f]{24}$/.test(projectId)) {
logger.debug({ projectId }, 'clearing bad project id')
- await ErrorRecorder.promises.record(projectId, 0, null)
+ await ErrorRecorder.promises.clearError(projectId)
return
}
diff --git a/services/project-history/app/js/Router.js b/services/project-history/app/js/Router.js
index d7233a511b..ec9a4f0582 100644
--- a/services/project-history/app/js/Router.js
+++ b/services/project-history/app/js/Router.js
@@ -22,10 +22,6 @@ export function initialize(app) {
app.delete('/project/:project_id', HttpController.deleteProject)
app.get('/project/:project_id/snapshot', HttpController.getLatestSnapshot)
- app.get(
- '/project/:project_id/latest/history',
- HttpController.getMostRecentChunk
- )
app.get(
'/project/:project_id/diff',
@@ -61,16 +57,6 @@ export function initialize(app) {
HttpController.getUpdates
)
- app.get(
- '/project/:project_id/changes',
- validate({
- query: {
- since: Joi.number().integer().min(0),
- },
- }),
- HttpController.getChangesSince
- )
-
app.get(
'/project/:project_id/changes-in-chunk',
validate({
diff --git a/services/project-history/app/js/SnapshotManager.js b/services/project-history/app/js/SnapshotManager.js
index 4dce9673b2..ed316743cf 100644
--- a/services/project-history/app/js/SnapshotManager.js
+++ b/services/project-history/app/js/SnapshotManager.js
@@ -73,7 +73,11 @@ async function getRangesSnapshot(projectId, version, pathname) {
})
}
if (!file.isEditable()) {
- throw new Error('File is not editable')
+ // A binary file has no tracked changes or comments
+ return {
+ changes: [],
+ comments: [],
+ }
}
const historyId = await WebApiManager.promises.getHistoryId(projectId)
await file.load('eager', HistoryStoreManager.getBlobStore(historyId))
@@ -124,7 +128,6 @@ async function getRangesSnapshot(projectId, version, pathname) {
)
const docUpdaterCompatibleComments = []
for (const comment of comments) {
- trackedDeletionOffset = 0
let trackedDeletionIndex = 0
if (comment.ranges.length === 0) {
// Translate detached comments into zero length comments at position 0
@@ -138,69 +141,60 @@ async function getRangesSnapshot(projectId, version, pathname) {
})
continue
}
- for (const commentRange of comment.ranges) {
- let commentRangeContent = ''
- let offsetFromOverlappingRangeAtStart = 0
- while (
- trackedDeletionIndex < trackedDeletions.length &&
- trackedDeletions[trackedDeletionIndex].range.start <
- commentRange.start &&
- trackedDeletions[trackedDeletionIndex].range.end <= commentRange.start
- ) {
- // Skip over tracked deletions that are before the current comment range
- trackedDeletionOffset +=
- trackedDeletions[trackedDeletionIndex].range.length
- trackedDeletionIndex++
+
+ // Consider a multiple range comment as a single comment that joins all its
+ // ranges
+ const commentStart = comment.ranges[0].start
+ const commentEnd = comment.ranges[comment.ranges.length - 1].end
+
+ let commentContent = ''
+ // Docupdater position
+ let position = commentStart
+ while (trackedDeletions[trackedDeletionIndex]?.range.end <= commentStart) {
+ // Skip over tracked deletions that are before the current comment range
+ position -= trackedDeletions[trackedDeletionIndex].range.length
+ trackedDeletionIndex++
+ }
+
+ if (trackedDeletions[trackedDeletionIndex]?.range.start < commentStart) {
+ // There's overlap with a tracked deletion, move the position left and
+ // truncate the overlap
+ position -=
+ commentStart - trackedDeletions[trackedDeletionIndex].range.start
+ }
+
+ // Cursor in the history content
+ let cursor = commentStart
+ while (cursor < commentEnd) {
+ const trackedDeletion = trackedDeletions[trackedDeletionIndex]
+ if (!trackedDeletion || trackedDeletion.range.start >= commentEnd) {
+ // We've run out of relevant tracked changes
+ commentContent += content.slice(cursor, commentEnd)
+ break
+ }
+ if (trackedDeletion.range.start > cursor) {
+ // There's a gap between the current cursor and the tracked deletion
+ commentContent += content.slice(cursor, trackedDeletion.range.start)
}
- if (
- trackedDeletions[trackedDeletionIndex]?.range.start < commentRange.start
- ) {
- // There's overlap with a tracked deletion, move the position left and
- // truncate the overlap
- offsetFromOverlappingRangeAtStart =
- commentRange.start -
- trackedDeletions[trackedDeletionIndex].range.start
- }
-
- // The position of the comment in the document after tracked deletions
- const position =
- commentRange.start -
- trackedDeletionOffset -
- offsetFromOverlappingRangeAtStart
-
- let cursor = commentRange.start
- while (cursor < commentRange.end) {
- const trackedDeletion = trackedDeletions[trackedDeletionIndex]
- if (
- !trackedDeletion ||
- trackedDeletion.range.start >= commentRange.end
- ) {
- // We've run out of relevant tracked changes
- commentRangeContent += content.slice(cursor, commentRange.end)
- break
- }
- if (trackedDeletion.range.start > cursor) {
- // There's a gap between the current cursor and the tracked deletion
- commentRangeContent += content.slice(
- cursor,
- trackedDeletion.range.start
- )
- }
+ if (trackedDeletion.range.end <= commentEnd) {
// Skip to the end of the tracked delete
cursor = trackedDeletion.range.end
trackedDeletionIndex++
- trackedDeletionOffset += trackedDeletion.range.length
+ } else {
+ // We're done with that comment
+ break
}
- docUpdaterCompatibleComments.push({
- op: {
- p: position,
- c: commentRangeContent,
- t: comment.id,
- resolved: comment.resolved,
- },
- })
}
+ docUpdaterCompatibleComments.push({
+ op: {
+ p: position,
+ c: commentContent,
+ t: comment.id,
+ resolved: comment.resolved,
+ },
+ id: comment.id,
+ })
}
return {
@@ -287,8 +281,26 @@ async function _getSnapshotAtVersion(projectId, version) {
return snapshot
}
+/**
+ * @param {string} projectId
+ * @param {string} historyId
+ * @return {Promise>}
+ */
async function getLatestSnapshotFiles(projectId, historyId) {
- const { snapshot } = await getLatestSnapshot(projectId, historyId)
+ const data = await HistoryStoreManager.promises.getMostRecentChunk(
+ projectId,
+ historyId
+ )
+ return await getLatestSnapshotFilesForChunk(historyId, data)
+}
+
+/**
+ * @param {string} historyId
+ * @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} chunk
+ * @return {Promise>}
+ */
+async function getLatestSnapshotFilesForChunk(historyId, chunk) {
+ const { snapshot } = getLatestSnapshotFromChunk(chunk)
const snapshotFiles = await snapshot.loadFiles(
'lazy',
HistoryStoreManager.getBlobStore(historyId)
@@ -296,11 +308,24 @@ async function getLatestSnapshotFiles(projectId, historyId) {
return snapshotFiles
}
+/**
+ * @param {string} projectId
+ * @param {string} historyId
+ * @return {Promise<{version: number, snapshot: import('overleaf-editor-core').Snapshot}>}
+ */
async function getLatestSnapshot(projectId, historyId) {
const data = await HistoryStoreManager.promises.getMostRecentChunk(
projectId,
historyId
)
+ return getLatestSnapshotFromChunk(data)
+}
+
+/**
+ * @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} data
+ * @return {{version: number, snapshot: import('overleaf-editor-core').Snapshot}}
+ */
+function getLatestSnapshotFromChunk(data) {
if (data == null || data.chunk == null) {
throw new OError('undefined chunk')
}
@@ -316,44 +341,6 @@ async function getLatestSnapshot(projectId, historyId) {
}
}
-async function getChangesSince(projectId, historyId, sinceVersion) {
- const allChanges = []
- let nextVersion
- while (true) {
- let data
- if (nextVersion) {
- data = await HistoryStoreManager.promises.getChunkAtVersion(
- projectId,
- historyId,
- nextVersion
- )
- } else {
- data = await HistoryStoreManager.promises.getMostRecentChunk(
- projectId,
- historyId
- )
- }
- if (data == null || data.chunk == null) {
- throw new OError('undefined chunk')
- }
- const chunk = Core.Chunk.fromRaw(data.chunk)
- if (sinceVersion > chunk.getEndVersion()) {
- throw new OError('requested version past the end')
- }
- const changes = chunk.getChanges()
- if (chunk.getStartVersion() > sinceVersion) {
- allChanges.unshift(...changes)
- nextVersion = chunk.getStartVersion()
- } else {
- allChanges.unshift(
- ...changes.slice(sinceVersion - chunk.getStartVersion())
- )
- break
- }
- }
- return allChanges
-}
-
async function getChangesInChunkSince(projectId, historyId, sinceVersion) {
const latestChunk = Core.Chunk.fromRaw(
(
@@ -401,35 +388,38 @@ async function _loadFilesLimit(snapshot, kind, blobStore) {
// EXPORTS
-const getChangesSinceCb = callbackify(getChangesSince)
const getChangesInChunkSinceCb = callbackify(getChangesInChunkSince)
const getFileSnapshotStreamCb = callbackify(getFileSnapshotStream)
const getProjectSnapshotCb = callbackify(getProjectSnapshot)
const getLatestSnapshotCb = callbackify(getLatestSnapshot)
const getLatestSnapshotFilesCb = callbackify(getLatestSnapshotFiles)
+const getLatestSnapshotFilesForChunkCb = callbackify(
+ getLatestSnapshotFilesForChunk
+)
const getRangesSnapshotCb = callbackify(getRangesSnapshot)
const getFileMetadataSnapshotCb = callbackify(getFileMetadataSnapshot)
const getPathsAtVersionCb = callbackify(getPathsAtVersion)
export {
- getChangesSinceCb as getChangesSince,
+ getLatestSnapshotFromChunk,
getChangesInChunkSinceCb as getChangesInChunkSince,
getFileSnapshotStreamCb as getFileSnapshotStream,
getProjectSnapshotCb as getProjectSnapshot,
getFileMetadataSnapshotCb as getFileMetadataSnapshot,
getLatestSnapshotCb as getLatestSnapshot,
getLatestSnapshotFilesCb as getLatestSnapshotFiles,
+ getLatestSnapshotFilesForChunkCb as getLatestSnapshotFilesForChunk,
getRangesSnapshotCb as getRangesSnapshot,
getPathsAtVersionCb as getPathsAtVersion,
}
export const promises = {
- getChangesSince,
getChangesInChunkSince,
getFileSnapshotStream,
getProjectSnapshot,
getLatestSnapshot,
getLatestSnapshotFiles,
+ getLatestSnapshotFilesForChunk,
getRangesSnapshot,
getPathsAtVersion,
getFileMetadataSnapshot,
diff --git a/services/project-history/app/js/SyncManager.js b/services/project-history/app/js/SyncManager.js
index 0a596be393..43cb61be9f 100644
--- a/services/project-history/app/js/SyncManager.js
+++ b/services/project-history/app/js/SyncManager.js
@@ -8,7 +8,7 @@ import logger from '@overleaf/logger'
import Metrics from '@overleaf/metrics'
import OError from '@overleaf/o-error'
import { File, Range } from 'overleaf-editor-core'
-import { SyncError } from './Errors.js'
+import { NeedFullProjectStructureResyncError, SyncError } from './Errors.js'
import { db, ObjectId } from './mongodb.js'
import * as SnapshotManager from './SnapshotManager.js'
import * as LockManager from './LockManager.js'
@@ -23,6 +23,7 @@ import { isInsert, isDelete } from './Utils.js'
/**
* @import { Comment as HistoryComment, TrackedChange as HistoryTrackedChange } from 'overleaf-editor-core'
+ * @import { CommentRawData, TrackedChangeRawData } from 'overleaf-editor-core/lib/types'
* @import { Comment, Entity, ResyncDocContentUpdate, RetainOp, TrackedChange } from './types'
* @import { TrackedChangeTransition, TrackingDirective, TrackingType, Update } from './types'
* @import { ProjectStructureUpdate } from './types'
@@ -53,15 +54,13 @@ async function startResync(projectId, options = {}) {
await LockManager.promises.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
async extendLock => {
- await _startResyncWithoutLock(projectId, options)
+ await startResyncWithoutLock(projectId, options)
}
)
} catch (error) {
// record error in starting sync ("sync ongoing")
- try {
+ if (error instanceof Error) {
await ErrorRecorder.promises.record(projectId, -1, error)
- } catch (err) {
- // swallow any error thrown by ErrorRecorder.record()
}
throw error
}
@@ -76,17 +75,20 @@ async function startHardResync(projectId, options = {}) {
await clearResyncState(projectId)
await RedisManager.promises.clearFirstOpTimestamp(projectId)
await RedisManager.promises.destroyDocUpdatesQueue(projectId)
- await _startResyncWithoutLock(projectId, options)
+ await startResyncWithoutLock(projectId, options)
}
)
} catch (error) {
// record error in starting sync ("sync ongoing")
- await ErrorRecorder.promises.record(projectId, -1, error)
+ if (error instanceof Error) {
+ await ErrorRecorder.promises.record(projectId, -1, error)
+ }
throw error
}
}
-async function _startResyncWithoutLock(projectId, options) {
+// The caller must hold the lock and should record any errors via the ErrorRecorder.
+async function startResyncWithoutLock(projectId, options) {
await ErrorRecorder.promises.recordSyncStart(projectId)
const syncState = await _getResyncState(projectId)
@@ -100,6 +102,9 @@ async function _startResyncWithoutLock(projectId, options) {
if (options.historyRangesMigration) {
webOpts.historyRangesMigration = options.historyRangesMigration
}
+ if (options.resyncProjectStructureOnly) {
+ webOpts.resyncProjectStructureOnly = options.resyncProjectStructureOnly
+ }
await WebApiManager.promises.requestResync(projectId, webOpts)
await setResyncState(projectId, syncState)
}
@@ -156,6 +161,29 @@ async function clearResyncState(projectId) {
})
}
+/**
+ * @param {string} projectId
+ * @param {Date} date
+ * @return {Promise}
+ */
+async function clearResyncStateIfAllAfter(projectId, date) {
+ const rawSyncState = await db.projectHistorySyncState.findOne({
+ project_id: new ObjectId(projectId.toString()),
+ })
+ if (!rawSyncState) return // already cleared
+ const state = SyncState.fromRaw(projectId, rawSyncState)
+ if (state.isSyncOngoing()) return // new sync started
+ for (const { timestamp } of rawSyncState.history) {
+ if (timestamp < date) return // preserve old resync states
+ }
+ // expiresAt is cleared when starting a sync and bumped when making changes.
+ // Use expiresAt as read to ensure we only clear the confirmed state.
+ await db.projectHistorySyncState.deleteOne({
+ project_id: new ObjectId(projectId.toString()),
+ expiresAt: rawSyncState.expiresAt,
+ })
+}
+
async function skipUpdatesDuringSync(projectId, updates) {
const syncState = await _getResyncState(projectId)
if (!syncState.isSyncOngoing()) {
@@ -178,9 +206,18 @@ async function skipUpdatesDuringSync(projectId, updates) {
return { updates: filteredUpdates, syncState }
}
+/**
+ * @param {string} projectId
+ * @param {string} projectHistoryId
+ * @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} mostRecentChunk
+ * @param {Array} updates
+ * @param {() => Promise} extendLock
+ * @return {Promise>}
+ */
async function expandSyncUpdates(
projectId,
projectHistoryId,
+ mostRecentChunk,
updates,
extendLock
) {
@@ -195,10 +232,11 @@ async function expandSyncUpdates(
const syncState = await _getResyncState(projectId)
// compute the current snapshot from the most recent chunk
- const snapshotFiles = await SnapshotManager.promises.getLatestSnapshotFiles(
- projectId,
- projectHistoryId
- )
+ const snapshotFiles =
+ await SnapshotManager.promises.getLatestSnapshotFilesForChunk(
+ projectHistoryId,
+ mostRecentChunk
+ )
// check if snapshot files are valid
const invalidFiles = _.pickBy(
@@ -271,8 +309,10 @@ class SyncState {
})
}
- for (const doc of update.resyncProjectStructure.docs) {
- this.startDocContentSync(doc.path)
+ if (!update.resyncProjectStructureOnly) {
+ for (const doc of update.resyncProjectStructure.docs) {
+ this.startDocContentSync(doc.path)
+ }
}
this.stopProjectStructureSync()
@@ -465,6 +505,28 @@ class SyncUpdateExpander {
persistedBinaryFiles
)
this.queueSetMetadataOpsForLinkedFiles(update)
+
+ if (update.resyncProjectStructureOnly) {
+ const docPaths = new Set()
+ for (const entity of update.resyncProjectStructure.docs) {
+ const path = UpdateTranslator._convertPathname(entity.path)
+ docPaths.add(path)
+ }
+ for (const expandedUpdate of this.expandedUpdates) {
+ if (docPaths.has(expandedUpdate.pathname)) {
+ // Clear the resync state and queue entry, we need to start over.
+ this.expandedUpdates = []
+ await clearResyncState(this.projectId)
+ await RedisManager.promises.deleteAppliedDocUpdate(
+ this.projectId,
+ update
+ )
+ throw new NeedFullProjectStructureResyncError(
+ 'aborting partial resync: touched doc'
+ )
+ }
+ }
+ }
} else if ('resyncDocContent' in update) {
logger.debug(
{ projectId: this.projectId, update },
@@ -530,9 +592,10 @@ class SyncUpdateExpander {
this.files[update.pathname] = File.fromString('')
} else {
update.file = entity.file
- update.url = entity.url
- update.hash = entity._hash
- update.metadata = entity.metadata
+ if (entity.url) update.url = entity.url
+ if (entity._hash) update.hash = entity._hash
+ if (entity.createdBlob) update.createdBlob = entity.createdBlob
+ if (entity.metadata) update.metadata = entity.metadata
}
this.expandedUpdates.push(update)
@@ -619,10 +682,11 @@ class SyncUpdateExpander {
ts: update.meta.ts,
},
file: entity.file,
- url: entity.url,
- hash: entity._hash,
- metadata: entity.metadata,
}
+ if (entity.url) addUpdate.url = entity.url
+ if (entity._hash) addUpdate.hash = entity._hash
+ if (entity.createdBlob) addUpdate.createdBlob = entity.createdBlob
+ if (entity.metadata) addUpdate.metadata = entity.metadata
this.expandedUpdates.push(addUpdate)
Metrics.inc('project_history_resync_operation', 1, {
status: 'update binary file contents',
@@ -701,11 +765,19 @@ class SyncUpdateExpander {
}
const persistedComments = file.getComments().toArray()
- await this.queueUpdatesForOutOfSyncComments(
- update,
- pathname,
- persistedComments
- )
+ if (update.resyncDocContent.historyOTRanges) {
+ this.queueUpdatesForOutOfSyncCommentsHistoryOT(
+ update,
+ pathname,
+ file.getComments().toRaw()
+ )
+ } else {
+ await this.queueUpdatesForOutOfSyncComments(
+ update,
+ pathname,
+ persistedComments
+ )
+ }
const persistedChanges = file.getTrackedChanges().asSorted()
await this.queueUpdatesForOutOfSyncTrackedChanges(
@@ -762,6 +834,91 @@ class SyncUpdateExpander {
return expandedUpdate
}
+ /**
+ * Queue updates for out of sync comments
+ *
+ * @param {ResyncDocContentUpdate} update
+ * @param {string} pathname
+ * @param {CommentRawData[]} persistedComments
+ */
+ queueUpdatesForOutOfSyncCommentsHistoryOT(
+ update,
+ pathname,
+ persistedComments
+ ) {
+ const expectedComments =
+ update.resyncDocContent.historyOTRanges?.comments ?? []
+ const expectedCommentsById = new Map(
+ expectedComments.map(comment => [comment.id, comment])
+ )
+ const persistedCommentsById = new Map(
+ persistedComments.map(comment => [comment.id, comment])
+ )
+
+ // Delete any persisted comment that is not in the expected comment list.
+ for (const persistedComment of persistedComments) {
+ if (!expectedCommentsById.has(persistedComment.id)) {
+ this.expandedUpdates.push({
+ doc: update.doc,
+ op: [{ deleteComment: persistedComment.id }],
+ meta: {
+ pathname,
+ resync: true,
+ origin: this.origin,
+ ts: update.meta.ts,
+ },
+ })
+ }
+ }
+
+ for (const expectedComment of expectedComments) {
+ const persistedComment = persistedCommentsById.get(expectedComment.id)
+ if (
+ persistedComment &&
+ commentRangesAreInSyncHistoryOT(persistedComment, expectedComment)
+ ) {
+ if (expectedComment.resolved === persistedComment.resolved) {
+ // Both comments are identical; do nothing
+ } else {
+ // Only the resolved state differs
+ this.expandedUpdates.push({
+ doc: update.doc,
+ op: [
+ {
+ commentId: expectedComment.id,
+ resolved: expectedComment.resolved,
+ },
+ ],
+ meta: {
+ pathname,
+ resync: true,
+ origin: this.origin,
+ ts: update.meta.ts,
+ },
+ })
+ }
+ } else {
+ // New comment or ranges differ
+ this.expandedUpdates.push({
+ doc: update.doc,
+ op: [
+ {
+ commentId: expectedComment.id,
+ ranges: expectedComment.ranges,
+ resolved: expectedComment.resolved,
+ },
+ ],
+ meta: {
+ pathname,
+ resync: true,
+ origin: this.origin,
+ ts: update.meta.ts,
+ },
+ })
+ }
+ }
+ }
+
/**
* Queue updates for out of sync comments
*
@@ -888,6 +1045,7 @@ class SyncUpdateExpander {
for (const transition of getTrackedChangesTransitions(
persistedChanges,
expectedChanges,
+ update.resyncDocContent.historyOTRanges?.trackedChanges || [],
expectedContent.length
)) {
if (transition.pos > cursor) {
@@ -955,6 +1113,25 @@ class SyncUpdateExpander {
}
}
+/**
+ * Compares the ranges in the persisted and expected comments
+ *
+ * @param {CommentRawData} persistedComment
+ * @param {CommentRawData} expectedComment
+ */
+function commentRangesAreInSyncHistoryOT(persistedComment, expectedComment) {
+ if (persistedComment.ranges.length !== expectedComment.ranges.length) {
+ return false
+ }
+ for (let i = 0; i < persistedComment.ranges.length; i++) {
+ const persistedRange = persistedComment.ranges[i]
+ const expectedRange = expectedComment.ranges[i]
+ if (persistedRange.pos !== expectedRange.pos) return false
+ if (persistedRange.length !== expectedRange.length) return false
+ }
+ return true
+}
+
/**
* Compares the ranges in the persisted and expected comments
*
@@ -986,11 +1163,13 @@ function commentRangesAreInSync(persistedComment, expectedComment) {
*
* @param {readonly HistoryTrackedChange[]} persistedChanges
* @param {TrackedChange[]} expectedChanges
+ * @param {TrackedChangeRawData[]} persistedChangesHistoryOT
* @param {number} docLength
*/
function getTrackedChangesTransitions(
persistedChanges,
expectedChanges,
+ persistedChangesHistoryOT,
docLength
) {
/** @type {TrackedChangeTransition[]} */
@@ -1013,6 +1192,19 @@ function getTrackedChangesTransitions(
})
}
+ for (const change of persistedChangesHistoryOT) {
+ transitions.push({
+ stage: 'expected',
+ pos: change.range.pos,
+ tracking: change.tracking,
+ })
+ transitions.push({
+ stage: 'expected',
+ pos: change.range.pos + change.range.length,
+ tracking: { type: 'none' },
+ })
+ }
+
for (const change of expectedChanges) {
const op = change.op
const pos = op.hpos ?? op.p
@@ -1093,6 +1285,7 @@ function trackingDirectivesEqual(a, b) {
// EXPORTS
const startResyncCb = callbackify(startResync)
+const startResyncWithoutLockCb = callbackify(startResyncWithoutLock)
const startHardResyncCb = callbackify(startHardResync)
const setResyncStateCb = callbackify(setResyncState)
const clearResyncStateCb = callbackify(clearResyncState)
@@ -1100,15 +1293,31 @@ const skipUpdatesDuringSyncCb = callbackifyMultiResult(skipUpdatesDuringSync, [
'updates',
'syncState',
])
+
+/**
+ * @param {string} projectId
+ * @param {string} projectHistoryId
+ * @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} mostRecentChunk
+ * @param {Array} updates
+ * @param {() => void} extendLock
+ * @param {(err: Error | null, updates?: Array) => void} callback
+ */
const expandSyncUpdatesCb = (
projectId,
projectHistoryId,
+ mostRecentChunk,
updates,
extendLock,
callback
) => {
const extendLockPromises = promisify(extendLock)
- expandSyncUpdates(projectId, projectHistoryId, updates, extendLockPromises)
+ expandSyncUpdates(
+ projectId,
+ projectHistoryId,
+ mostRecentChunk,
+ updates,
+ extendLockPromises
+ )
.then(result => {
callback(null, result)
})
@@ -1119,6 +1328,7 @@ const expandSyncUpdatesCb = (
export {
startResyncCb as startResync,
+ startResyncWithoutLockCb as startResyncWithoutLock,
startHardResyncCb as startHardResync,
setResyncStateCb as setResyncState,
clearResyncStateCb as clearResyncState,
@@ -1128,9 +1338,11 @@ export {
export const promises = {
startResync,
+ startResyncWithoutLock,
startHardResync,
setResyncState,
clearResyncState,
+ clearResyncStateIfAllAfter,
skipUpdatesDuringSync,
expandSyncUpdates,
}
diff --git a/services/project-history/app/js/UpdateCompressor.js b/services/project-history/app/js/UpdateCompressor.js
index c6ab91f959..5ae7591a7f 100644
--- a/services/project-history/app/js/UpdateCompressor.js
+++ b/services/project-history/app/js/UpdateCompressor.js
@@ -1,7 +1,15 @@
// @ts-check
+import Metrics from '@overleaf/metrics'
import OError from '@overleaf/o-error'
import DMP from 'diff-match-patch'
+import { EditOperationBuilder } from 'overleaf-editor-core'
+import zlib from 'node:zlib'
+import { ReadableString, WritableBuffer } from '@overleaf/stream-utils'
+import Stream from 'node:stream'
+import logger from '@overleaf/logger'
+import { callbackify } from '@overleaf/promise-utils'
+import Settings from '@overleaf/settings'
/**
* @import { DeleteOp, InsertOp, Op, Update } from './types'
@@ -29,11 +37,16 @@ const cloneWithOp = function (update, op) {
return update
}
const mergeUpdatesWithOp = function (firstUpdate, secondUpdate, op) {
- // We want to take doc_length and ts from the firstUpdate, v from the second
+ // We want to take doc_length and ts from the firstUpdate, v and doc_hash from the second
const update = cloneWithOp(firstUpdate, op)
if (secondUpdate.v != null) {
update.v = secondUpdate.v
}
+ if (secondUpdate.meta.doc_hash != null) {
+ update.meta.doc_hash = secondUpdate.meta.doc_hash
+ } else {
+ delete update.meta.doc_hash
+ }
return update
}
@@ -112,8 +125,11 @@ export function convertToSingleOpUpdates(updates) {
if (docLength === -1) {
docLength = 0
}
+ const docHash = update.meta.doc_hash
for (const op of ops) {
const splitUpdate = cloneWithOp(update, op)
+ // Only the last update will keep the doc_hash property
+ delete splitUpdate.meta.doc_hash
if (docLength != null) {
splitUpdate.meta.doc_length = docLength
docLength = adjustLengthByOp(docLength, op, {
@@ -123,6 +139,9 @@ export function convertToSingleOpUpdates(updates) {
}
splitUpdates.push(splitUpdate)
}
+ if (docHash != null && splitUpdates.length > 0) {
+ splitUpdates[splitUpdates.length - 1].meta.doc_hash = docHash
+ }
}
return splitUpdates
}
@@ -150,9 +169,16 @@ export function concatUpdatesWithSameVersion(updates) {
lastUpdate.op != null &&
lastUpdate.v === update.v &&
lastUpdate.doc === update.doc &&
- lastUpdate.pathname === update.pathname
+ lastUpdate.pathname === update.pathname &&
+ EditOperationBuilder.isValid(update.op[0]) ===
+ EditOperationBuilder.isValid(lastUpdate.op[0])
) {
lastUpdate.op = lastUpdate.op.concat(update.op)
+ if (update.meta.doc_hash == null) {
+ delete lastUpdate.meta.doc_hash
+ } else {
+ lastUpdate.meta.doc_hash = update.meta.doc_hash
+ }
} else {
concattedUpdates.push(update)
}
@@ -163,6 +189,66 @@ export function concatUpdatesWithSameVersion(updates) {
return concattedUpdates
}
+async function estimateStorage(updates) {
+ const blob = JSON.stringify(updates)
+ const bytes = Buffer.from(blob).byteLength
+ const read = new ReadableString(blob)
+ const compress = zlib.createGzip()
+ const write = new WritableBuffer()
+ await Stream.promises.pipeline(read, compress, write)
+ const bytesGz = write.size()
+ return { bytes, bytesGz, nUpdates: updates.length }
+}
+
+/**
+ * @param {Update[]} rawUpdates
+ * @param {string} projectId
+ * @param {import("./Profiler").Profiler} profile
+ * @return {Promise}
+ */
+async function compressRawUpdatesWithMetrics(rawUpdates, projectId, profile) {
+ if (100 * Math.random() > Settings.estimateCompressionSample) {
+ return compressRawUpdatesWithProfile(rawUpdates, projectId, profile)
+ }
+ const before = await estimateStorage(rawUpdates)
+ profile.log('estimateRawUpdatesSize')
+ const updates = compressRawUpdatesWithProfile(rawUpdates, projectId, profile)
+ const after = await estimateStorage(updates)
+ for (const [path, values] of Object.entries({ before, after })) {
+ for (const [method, v] of Object.entries(values)) {
+ Metrics.summary('updates_compression_estimate', v, { path, method })
+ }
+ }
+ for (const method of Object.keys(before)) {
+ const percentage = Math.ceil(100 * (after[method] / before[method]))
+ Metrics.summary('updates_compression_percentage', percentage, { method })
+ }
+ profile.log('estimateCompressedUpdatesSize')
+ return updates
+}
+
+export const compressRawUpdatesWithMetricsCb = callbackify(
+ compressRawUpdatesWithMetrics
+)
+
+/**
+ * @param {Update[]} rawUpdates
+ * @param {string} projectId
+ * @param {import("./Profiler").Profiler} profile
+ * @return {Update[]}
+ */
+function compressRawUpdatesWithProfile(rawUpdates, projectId, profile) {
+ const updates = compressRawUpdates(rawUpdates)
+ const timeTaken = profile.log('compressRawUpdates').getTimeDelta()
+ if (timeTaken >= 1000) {
+ logger.debug(
+ { projectId, updates: rawUpdates, timeTaken },
+ 'slow compression of raw updates'
+ )
+ }
+ return updates
+}
+
export function compressRawUpdates(rawUpdates) {
let updates = convertToSingleOpUpdates(rawUpdates)
updates = compressUpdates(updates)
@@ -214,6 +300,13 @@ function _concatTwoUpdates(firstUpdate, secondUpdate) {
return [firstUpdate, secondUpdate]
}
+ const firstUpdateIsHistoryOT = EditOperationBuilder.isValid(firstUpdate.op)
+ const secondUpdateIsHistoryOT = EditOperationBuilder.isValid(secondUpdate.op)
+ if (firstUpdateIsHistoryOT !== secondUpdateIsHistoryOT) {
+ // cannot merge mix of sharejs-text-op and history-ot, should not happen.
+ return [firstUpdate, secondUpdate]
+ }
+
if (
firstUpdate.doc !== secondUpdate.doc ||
firstUpdate.pathname !== secondUpdate.pathname
@@ -260,6 +353,15 @@ function _concatTwoUpdates(firstUpdate, secondUpdate) {
return [firstUpdate, secondUpdate]
}
+ if (firstUpdateIsHistoryOT && secondUpdateIsHistoryOT) {
+ const op1 = EditOperationBuilder.fromJSON(firstUpdate.op)
+ const op2 = EditOperationBuilder.fromJSON(secondUpdate.op)
+ if (!op1.canBeComposedWith(op2)) return [firstUpdate, secondUpdate]
+ return [
+ mergeUpdatesWithOp(firstUpdate, secondUpdate, op1.compose(op2).toJSON()),
+ ]
+ }
+
if (
firstUpdate.op.trackedDeleteRejection ||
secondUpdate.op.trackedDeleteRejection
@@ -381,9 +483,16 @@ function _concatTwoUpdates(firstUpdate, secondUpdate) {
// Make sure that commentIds metadata is propagated to inserts
op.commentIds = secondOp.commentIds
}
- return mergeUpdatesWithOp(firstUpdate, secondUpdate, op)
+ const update = mergeUpdatesWithOp(firstUpdate, secondUpdate, op)
+ // Set the doc hash only on the last update
+ delete update.meta.doc_hash
+ return update
}
)
+ const docHash = secondUpdate.meta.doc_hash
+ if (docHash != null && diffUpdates.length > 0) {
+ diffUpdates[diffUpdates.length - 1].meta.doc_hash = docHash
+ }
// Doing a diff like this loses track of the doc lengths for each
// update, so recalculate them
@@ -417,8 +526,7 @@ export function diffAsShareJsOps(before, after) {
const ops = []
let position = 0
for (const diff of diffs) {
- const type = diff[0]
- const content = diff[1]
+ const [type, content] = diff
if (type === ADDED) {
ops.push({
i: content,
diff --git a/services/project-history/app/js/UpdateTranslator.js b/services/project-history/app/js/UpdateTranslator.js
index 82a1de05ff..43b9f48270 100644
--- a/services/project-history/app/js/UpdateTranslator.js
+++ b/services/project-history/app/js/UpdateTranslator.js
@@ -7,7 +7,7 @@ import * as OperationsCompressor from './OperationsCompressor.js'
import { isInsert, isRetain, isDelete, isComment } from './Utils.js'
/**
- * @import { AddDocUpdate, AddFileUpdate, DeleteCommentUpdate, Op, RawScanOp } from './types'
+ * @import { AddDocUpdate, AddFileUpdate, DeleteCommentUpdate, HistoryOTEditOperationUpdate, Op, RawScanOp } from './types'
* @import { RenameUpdate, TextUpdate, TrackingDirective, TrackingProps } from './types'
* @import { SetCommentStateUpdate, SetFileMetadataOperation, Update, UpdateWithBlob } from './types'
*/
@@ -60,6 +60,16 @@ function _convertToChange(projectId, updateWithBlob) {
}
operations = [op]
projectVersion = update.version
+ } else if (isHistoryOTEditOperationUpdate(update)) {
+ let { pathname } = update.meta
+ pathname = _convertPathname(pathname)
+ if (update.v != null) {
+ v2DocVersions[update.doc] = { pathname, v: update.v }
+ }
+ operations = update.op.map(op => {
+ // Turn EditOperation into EditFileOperation by adding the pathname field.
+ return { pathname, ...op }
+ })
} else if (isTextUpdate(update)) {
const docLength = update.meta.history_doc_length ?? update.meta.doc_length
let pathname = update.meta.pathname
@@ -70,6 +80,12 @@ function _convertToChange(projectId, updateWithBlob) {
for (const op of update.op) {
builder.addOp(op, update)
}
+ // add doc hash if present
+ if (update.meta.doc_hash != null) {
+ // This will commit the text operation that the builder is currently
+ // building and set the contentHash property.
+ builder.commitTextOperation({ contentHash: update.meta.doc_hash })
+ }
operations = builder.finish()
// add doc version information if present
if (update.v != null) {
@@ -188,6 +204,22 @@ export function isTextUpdate(update) {
)
}
+/**
+ * @param {Update} update
+ * @returns {update is HistoryOTEditOperationUpdate}
+ */
+export function isHistoryOTEditOperationUpdate(update) {
+ return (
+ 'doc' in update &&
+ update.doc != null &&
+ 'op' in update &&
+ update.op != null &&
+ 'pathname' in update.meta &&
+ update.meta.pathname != null &&
+ Core.EditOperationBuilder.isValid(update.op[0])
+ )
+}
+
export function isProjectStructureUpdate(update) {
return isAddUpdate(update) || _isRenameUpdate(update)
}
@@ -285,8 +317,8 @@ class OperationsBuilder {
const pos = Math.min(op.hpos ?? op.p, this.docLength)
if (isComment(op)) {
- // Close the current text operation
- this.pushTextOperation()
+ // Commit the current text operation
+ this.commitTextOperation()
// Add a comment operation
const commentLength = op.hlen ?? op.c.length
@@ -307,7 +339,7 @@ class OperationsBuilder {
}
if (pos < this.cursor) {
- this.pushTextOperation()
+ this.commitTextOperation()
// At this point, this.cursor === 0 and we can continue
}
@@ -450,23 +482,32 @@ class OperationsBuilder {
this.docLength -= length
}
- pushTextOperation() {
- if (this.textOperation.length > 0)
- if (this.cursor < this.docLength) {
- this.retain(this.docLength - this.cursor)
- }
+ /**
+ * Finalize the current text operation and push it to the queue
+ *
+ * @param {object} [opts]
+ * @param {string} [opts.contentHash]
+ */
+ commitTextOperation(opts = {}) {
+ if (this.textOperation.length > 0 && this.cursor < this.docLength) {
+ this.retain(this.docLength - this.cursor)
+ }
if (this.textOperation.length > 0) {
- this.operations.push({
+ const operation = {
pathname: this.pathname,
textOperation: this.textOperation,
- })
+ }
+ if (opts.contentHash != null) {
+ operation.contentHash = opts.contentHash
+ }
+ this.operations.push(operation)
this.textOperation = []
}
this.cursor = 0
}
finish() {
- this.pushTextOperation()
+ this.commitTextOperation()
return this.operations
}
}
diff --git a/services/project-history/app/js/UpdatesProcessor.js b/services/project-history/app/js/UpdatesProcessor.js
index df9ccf11ea..b4895c012d 100644
--- a/services/project-history/app/js/UpdatesProcessor.js
+++ b/services/project-history/app/js/UpdatesProcessor.js
@@ -15,6 +15,8 @@ import * as WebApiManager from './WebApiManager.js'
import * as SyncManager from './SyncManager.js'
import * as Versions from './Versions.js'
import * as Errors from './Errors.js'
+import * as Metrics from './Metrics.js'
+import * as RetryManager from './RetryManager.js'
import { Profiler } from './Profiler.js'
const keys = Settings.redis.lock.key_schema
@@ -59,8 +61,67 @@ export function getRawUpdates(projectId, batchSize, callback) {
})
}
+// Trigger resync and start processing under lock to avoid other operations to
+// flush the resync updates.
+export function startResyncAndProcessUpdatesUnderLock(
+ projectId,
+ opts,
+ callback
+) {
+ const startTimeMs = Date.now()
+ LockManager.runWithLock(
+ keys.projectHistoryLock({ project_id: projectId }),
+ (extendLock, releaseLock) => {
+ SyncManager.startResyncWithoutLock(projectId, opts, err => {
+ if (err) return callback(OError.tag(err))
+ extendLock(err => {
+ if (err) return callback(OError.tag(err))
+ _countAndProcessUpdates(
+ projectId,
+ extendLock,
+ REDIS_READ_BATCH_SIZE,
+ releaseLock
+ )
+ })
+ })
+ },
+ (flushError, { queueSize } = {}) => {
+ if (flushError) {
+ OError.tag(flushError)
+ ErrorRecorder.record(projectId, queueSize, flushError, recordError => {
+ if (recordError) {
+ logger.error(
+ { err: recordError, projectId },
+ 'failed to record error'
+ )
+ }
+ callback(flushError)
+ })
+ } else {
+ ErrorRecorder.clearError(projectId, clearError => {
+ if (clearError) {
+ logger.error(
+ { err: clearError, projectId },
+ 'failed to clear error'
+ )
+ }
+ callback()
+ })
+ }
+ if (queueSize > 0) {
+ const duration = (Date.now() - startTimeMs) / 1000
+ Metrics.historyFlushDurationSeconds.observe(duration)
+ Metrics.historyFlushQueueSize.observe(queueSize)
+ }
+ // clear the timestamp in the background if the queue is now empty
+ RedisManager.clearDanglingFirstOpTimestamp(projectId, () => {})
+ }
+ )
+}
+
// Process all updates for a project, only check project-level information once
export function processUpdatesForProject(projectId, callback) {
+ const startTimeMs = Date.now()
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(extendLock, releaseLock) => {
@@ -71,17 +132,114 @@ export function processUpdatesForProject(projectId, callback) {
releaseLock
)
},
- (error, queueSize) => {
- if (error) {
- OError.tag(error)
+ (flushError, { queueSize, resyncNeeded } = {}) => {
+ if (flushError) {
+ OError.tag(flushError)
+ ErrorRecorder.record(
+ projectId,
+ queueSize,
+ flushError,
+ (recordError, failure) => {
+ if (recordError) {
+ logger.error(
+ { err: recordError, projectId },
+ 'failed to record error'
+ )
+ callback(recordError)
+ } else if (
+ RetryManager.isFirstFailure(failure) &&
+ RetryManager.isHardFailure(failure)
+ ) {
+ // This is the first failed flush since the last successful flush.
+ // Immediately attempt a resync.
+ logger.warn({ projectId }, 'Flush failed, attempting resync')
+ resyncProject(projectId, callback)
+ } else {
+ callback(flushError)
+ }
+ }
+ )
+ } else {
+ ErrorRecorder.clearError(projectId, clearError => {
+ if (clearError) {
+ logger.error(
+ { err: clearError, projectId },
+ 'failed to clear error'
+ )
+ }
+ if (resyncNeeded) {
+ logger.warn(
+ { projectId },
+ 'Resyncing project as requested by full project history'
+ )
+ resyncProject(projectId, callback)
+ } else {
+ callback()
+ }
+ })
+ }
+ if (queueSize > 0) {
+ const duration = (Date.now() - startTimeMs) / 1000
+ Metrics.historyFlushDurationSeconds.observe(duration)
+ Metrics.historyFlushQueueSize.observe(queueSize)
}
- ErrorRecorder.record(projectId, queueSize, error, callback)
// clear the timestamp in the background if the queue is now empty
RedisManager.clearDanglingFirstOpTimestamp(projectId, () => {})
}
)
}
+export function resyncProject(projectId, callback) {
+ SyncManager.startHardResync(projectId, {}, error => {
+ if (error != null) {
+ return callback(OError.tag(error))
+ }
+ // Flush the sync operations; this will not loop indefinitely
+ // because any failure won't be the first failure anymore.
+ LockManager.runWithLock(
+ keys.projectHistoryLock({ project_id: projectId }),
+ (extendLock, releaseLock) => {
+ _countAndProcessUpdates(
+ projectId,
+ extendLock,
+ REDIS_READ_BATCH_SIZE,
+ releaseLock
+ )
+ },
+ (flushError, { queueSize } = {}) => {
+ if (flushError) {
+ ErrorRecorder.record(
+ projectId,
+ queueSize,
+ flushError,
+ (recordError, failure) => {
+ if (recordError) {
+ logger.error(
+ { err: recordError, projectId },
+ 'failed to record error'
+ )
+ callback(OError.tag(recordError))
+ } else {
+ callback(OError.tag(flushError))
+ }
+ }
+ )
+ } else {
+ ErrorRecorder.clearError(projectId, clearError => {
+ if (clearError) {
+ logger.error(
+ { err: clearError, projectId },
+ 'failed to clear error'
+ )
+ }
+ callback()
+ })
+ }
+ }
+ )
+ })
+}
+
export function processUpdatesForProjectUsingBisect(
projectId,
amountToProcess,
@@ -97,21 +255,29 @@ export function processUpdatesForProjectUsingBisect(
releaseLock
)
},
- (error, queueSize) => {
+ (flushError, { queueSize } = {}) => {
if (amountToProcess === 0 || queueSize === 0) {
// no further processing possible
- if (error != null) {
+ if (flushError != null) {
ErrorRecorder.record(
projectId,
queueSize,
- OError.tag(error),
- callback
+ OError.tag(flushError),
+ recordError => {
+ if (recordError) {
+ logger.error(
+ { err: recordError, projectId },
+ 'failed to record error'
+ )
+ }
+ callback(flushError)
+ }
)
} else {
callback()
}
} else {
- if (error != null) {
+ if (flushError != null) {
// decrease the batch size when we hit an error
processUpdatesForProjectUsingBisect(
projectId,
@@ -140,13 +306,31 @@ export function processSingleUpdateForProject(projectId, callback) {
) => {
_countAndProcessUpdates(projectId, extendLock, 1, releaseLock)
},
- (
- error,
- queueSize // no need to clear the flush marker when single stepping
- ) => {
+ (flushError, { queueSize } = {}) => {
+ // no need to clear the flush marker when single stepping
// it will be cleared up on the next background flush if
// the queue is empty
- ErrorRecorder.record(projectId, queueSize, error, callback)
+ if (flushError) {
+ ErrorRecorder.record(projectId, queueSize, flushError, recordError => {
+ if (recordError) {
+ logger.error(
+ { err: recordError, projectId },
+ 'failed to record error'
+ )
+ }
+ callback(flushError)
+ })
+ } else {
+ ErrorRecorder.clearError(projectId, clearError => {
+ if (clearError) {
+ logger.error(
+ { err: clearError, projectId },
+ 'failed to clear error'
+ )
+ }
+ callback()
+ })
+ }
}
)
}
@@ -163,18 +347,34 @@ _mocks._countAndProcessUpdates = (
}
if (queueSize > 0) {
logger.debug({ projectId, queueSize }, 'processing uncompressed updates')
+
+ let resyncNeeded = false
RedisManager.getUpdatesInBatches(
projectId,
batchSize,
(updates, cb) => {
- _processUpdatesBatch(projectId, updates, extendLock, cb)
+ _processUpdatesBatch(
+ projectId,
+ updates,
+ extendLock,
+ (err, flushResponse) => {
+ if (err) {
+ return cb(err)
+ }
+
+ if (flushResponse.resyncNeeded) {
+ resyncNeeded = true
+ }
+ cb()
+ }
+ )
},
error => {
// Unconventional callback signature. The caller needs the queue size
// even when an error is thrown in order to record the queue size in
// the projectHistoryFailures collection. We'll have to find another
// way to achieve this when we promisify.
- callback(error, queueSize)
+ callback(error, { queueSize, resyncNeeded })
}
)
} else {
@@ -200,15 +400,21 @@ function _processUpdatesBatch(projectId, updates, extendLock, callback) {
{ projectId },
'discarding updates as project does not use history'
)
- return callback()
+ return callback(null, {})
}
- _processUpdates(projectId, historyId, updates, extendLock, error => {
- if (error != null) {
- return callback(OError.tag(error))
+ _processUpdates(
+ projectId,
+ historyId,
+ updates,
+ extendLock,
+ (error, flushResponse) => {
+ if (error != null) {
+ return callback(OError.tag(error))
+ }
+ callback(null, flushResponse)
}
- callback()
- })
+ )
})
}
@@ -222,17 +428,12 @@ export function _getHistoryId(projectId, updates, callback) {
idFromUpdates = update.projectHistoryId.toString()
} else if (idFromUpdates !== update.projectHistoryId.toString()) {
metrics.inc('updates.batches.project-history-id.inconsistent-update')
- logger.warn(
- {
+ return callback(
+ new OError('inconsistent project history id between updates', {
projectId,
- updates,
idFromUpdates,
currentId: update.projectHistoryId,
- },
- 'inconsistent project history id between updates'
- )
- return callback(
- new OError('inconsistent project history id between updates')
+ })
)
}
}
@@ -293,15 +494,11 @@ function _handleOpsOutOfOrderError(projectId, projectHistoryId, err, ...rest) {
// Bypass ops-out-of-order errors in the stored chunk when in forceDebug mode
if (failureRecord != null && failureRecord.forceDebug === true) {
logger.warn(
- { projectId, projectHistoryId },
+ { err, projectId, projectHistoryId },
'ops out of order in chunk, forced continue'
)
callback(null, ...results) // return results without error
} else {
- logger.warn(
- { projectId, projectHistoryId },
- 'ops out of order in chunk, returning error'
- )
callback(err, ...results)
}
})
@@ -349,13 +546,22 @@ export function _processUpdates(
}
if (filteredUpdates.length === 0) {
// return early if there are no updates to apply
- return SyncManager.setResyncState(projectId, newSyncState, callback)
+ return SyncManager.setResyncState(projectId, newSyncState, err => {
+ if (err) return callback(err)
+ callback(null, { resyncNeeded: false })
+ })
}
// only make request to history service if we have actual updates to process
_getMostRecentVersionWithDebug(
projectId,
projectHistoryId,
- (error, baseVersion, projectStructureAndDocVersions) => {
+ (
+ error,
+ baseVersion,
+ projectStructureAndDocVersions,
+ _lastChange,
+ mostRecentChunk
+ ) => {
if (projectStructureAndDocVersions == null) {
projectStructureAndDocVersions = { project: null, docs: {} }
}
@@ -363,6 +569,8 @@ export function _processUpdates(
if (error != null) {
return callback(error)
}
+
+ let resyncNeeded = false
async.waterfall(
[
cb => {
@@ -370,6 +578,7 @@ export function _processUpdates(
SyncManager.expandSyncUpdates(
projectId,
projectHistoryId,
+ mostRecentChunk,
filteredUpdates,
extendLock,
cb
@@ -387,17 +596,17 @@ export function _processUpdates(
return cb(err)
}
profile.log('skipAlreadyAppliedUpdates')
- const compressedUpdates =
- UpdateCompressor.compressRawUpdates(unappliedUpdates)
- const timeTaken = profile
- .log('compressRawUpdates')
- .getTimeDelta()
- if (timeTaken >= 1000) {
- logger.debug(
- { projectId, updates: unappliedUpdates, timeTaken },
- 'slow compression of raw updates'
- )
- }
+ cb(null, unappliedUpdates)
+ },
+ (unappliedUpdates, cb) => {
+ UpdateCompressor.compressRawUpdatesWithMetricsCb(
+ unappliedUpdates,
+ projectId,
+ profile,
+ cb
+ )
+ },
+ (compressedUpdates, cb) => {
cb = profile.wrap('createBlobs', cb)
BlobManager.createBlobsForUpdates(
projectId,
@@ -472,7 +681,13 @@ export function _processUpdates(
projectHistoryId,
changes,
baseVersion,
- cb
+ (err, response) => {
+ if (err) {
+ return cb(err)
+ }
+ resyncNeeded = response.resyncNeeded
+ cb()
+ }
)
})
},
@@ -483,7 +698,11 @@ export function _processUpdates(
],
error => {
profile.end()
- callback(error)
+ if (error) {
+ callback(error)
+ } else {
+ callback(null, { resyncNeeded })
+ }
}
)
}
@@ -617,5 +836,10 @@ function _sanitizeUpdate(update) {
}
export const promises = {
+ /** @type {(projectId: string) => Promise} */
processUpdatesForProject: promisify(processUpdatesForProject),
+ /** @type {(projectId: string, opts: any) => Promise} */
+ startResyncAndProcessUpdatesUnderLock: promisify(
+ startResyncAndProcessUpdatesUnderLock
+ ),
}
diff --git a/services/project-history/app/js/WebApiManager.js b/services/project-history/app/js/WebApiManager.js
index dc1c366892..2697db29c7 100644
--- a/services/project-history/app/js/WebApiManager.js
+++ b/services/project-history/app/js/WebApiManager.js
@@ -39,6 +39,9 @@ async function requestResync(projectId, opts = {}) {
if (opts.historyRangesMigration) {
body.historyRangesMigration = opts.historyRangesMigration
}
+ if (opts.resyncProjectStructureOnly) {
+ body.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
+ }
await fetchNothing(
`${Settings.apis.web.url}/project/${projectId}/history/resync`,
{
diff --git a/services/project-history/app/js/mongo-types.ts b/services/project-history/app/js/mongo-types.ts
new file mode 100644
index 0000000000..9894e653d2
--- /dev/null
+++ b/services/project-history/app/js/mongo-types.ts
@@ -0,0 +1,22 @@
+import { ObjectId } from 'mongodb-legacy'
+
+export type ProjectHistoryFailure = {
+ _id: ObjectId
+ project_id: string
+ attempts: number
+ resyncAttempts: number
+ resyncStartedAt: Date
+ requestCount?: number
+ history: (ErrorRecord | SyncStartRecord)[]
+} & ErrorRecord
+
+type ErrorRecord = {
+ error: string
+ stack: string
+ queueSize: number
+ ts: Date
+}
+
+type SyncStartRecord = {
+ resyncStartedAt: Date
+}
diff --git a/services/project-history/app/js/mongodb.js b/services/project-history/app/js/mongodb.js
index 98fe2a8ffe..d639903ce2 100644
--- a/services/project-history/app/js/mongodb.js
+++ b/services/project-history/app/js/mongodb.js
@@ -3,6 +3,10 @@ import Settings from '@overleaf/settings'
import mongodb from 'mongodb-legacy'
const { MongoClient, ObjectId } = mongodb
+/**
+ * @import { ProjectHistoryFailure } from './mongo-types.ts'
+ */
+
export { ObjectId }
export const mongoClient = new MongoClient(
@@ -16,6 +20,7 @@ Metrics.mongodb.monitor(mongoClient)
export const db = {
deletedProjects: mongoDb.collection('deletedProjects'),
projects: mongoDb.collection('projects'),
+ /** @type {mongodb.Collection} */
projectHistoryFailures: mongoDb.collection('projectHistoryFailures'),
projectHistoryLabels: mongoDb.collection('projectHistoryLabels'),
projectHistorySyncState: mongoDb.collection('projectHistorySyncState'),
diff --git a/services/project-history/app/js/types.ts b/services/project-history/app/js/types.ts
index c5f88e66e1..c11b7741e3 100644
--- a/services/project-history/app/js/types.ts
+++ b/services/project-history/app/js/types.ts
@@ -1,5 +1,11 @@
import { HistoryRanges } from '../../../document-updater/app/js/types'
-import { LinkedFileData, RawOrigin } from 'overleaf-editor-core/lib/types'
+import {
+ LinkedFileData,
+ RawEditOperation,
+ RawOrigin,
+ CommentRawData,
+ TrackedChangeRawData,
+} from 'overleaf-editor-core/lib/types'
export type Update =
| TextUpdate
@@ -35,10 +41,20 @@ export type TextUpdate = {
meta: UpdateMeta & {
pathname: string
doc_length: number
+ doc_hash?: string
history_doc_length?: number
}
}
+export type HistoryOTEditOperationUpdate = {
+ doc: string
+ op: RawEditOperation[]
+ v: number
+ meta: UpdateMeta & {
+ pathname: string
+ }
+}
+
export type SetCommentStateUpdate = {
pathname: string
commentId: string
@@ -94,6 +110,9 @@ export type ResyncProjectStructureUpdate = {
meta: {
ts: string
}
+ // optional fields for resyncProjectStructureOnly=true
+ resyncProjectStructureOnly?: boolean
+ _raw: string
}
export type ResyncDocContentUpdate = {
@@ -101,6 +120,10 @@ export type ResyncDocContentUpdate = {
content: string
version: number
ranges?: Ranges
+ historyOTRanges?: {
+ comments: CommentRawData[]
+ trackedChanges: TrackedChangeRawData[]
+ }
resolvedCommentIds?: string[]
}
projectHistoryId: string
@@ -210,9 +233,10 @@ export type Doc = {
export type File = {
file: string
- url: string
+ url?: string
path: string
- _hash: string
+ _hash?: string
+ createdBlob?: boolean
metadata?: LinkedFileData
}
diff --git a/services/project-history/buildscript.txt b/services/project-history/buildscript.txt
index 547b81b1c8..dc83e95fe4 100644
--- a/services/project-history/buildscript.txt
+++ b/services/project-history/buildscript.txt
@@ -4,6 +4,6 @@ project-history
--env-add=
--env-pass-through=
--esmock-loader=True
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=False
---script-version=4.5.0
+--script-version=4.7.0
diff --git a/services/project-history/config/settings.defaults.cjs b/services/project-history/config/settings.defaults.cjs
index 15a96a8373..d767cddd96 100644
--- a/services/project-history/config/settings.defaults.cjs
+++ b/services/project-history/config/settings.defaults.cjs
@@ -27,6 +27,7 @@ module.exports = {
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016`,
},
filestore: {
+ enabled: process.env.FILESTORE_ENABLED !== 'false',
url: `http://${process.env.FILESTORE_HOST || '127.0.0.1'}:3009`,
},
web: {
@@ -40,6 +41,9 @@ module.exports = {
10
),
},
+ project_history: {
+ url: `http://${process.env.PROJECT_HISTORY_HOST || '127.0.0.1'}:3054`,
+ },
},
redis: {
lock: {
@@ -102,4 +106,12 @@ module.exports = {
},
maxFileSizeInBytes: 100 * 1024 * 1024, // 100 megabytes
+
+ shortHistoryQueues: (process.env.SHORT_HISTORY_QUEUES || '')
+ .split(',')
+ .filter(s => !!s),
+ estimateCompressionSample: parseInt(
+ process.env.ESTIMATE_COMPRESSION_SAMPLE || '0',
+ 10
+ ),
}
diff --git a/services/project-history/docker-compose.ci.yml b/services/project-history/docker-compose.ci.yml
index 332a9710ca..c6ec24a84b 100644
--- a/services/project-history/docker-compose.ci.yml
+++ b/services/project-history/docker-compose.ci.yml
@@ -21,18 +21,22 @@ services:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
+ HISTORY_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
+ volumes:
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
redis:
condition: service_healthy
user: node
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run test:acceptance
@@ -44,16 +48,21 @@ services:
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
redis:
- image: redis
+ image: redis:7.4.3
healthcheck:
test: ping="$$(redis-cli ping)" && [ "$$ping" = 'PONG' ]
interval: 1s
retries: 20
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/project-history/docker-compose.yml b/services/project-history/docker-compose.yml
index 878dffbf64..dd3c6468fe 100644
--- a/services/project-history/docker-compose.yml
+++ b/services/project-history/docker-compose.yml
@@ -6,7 +6,7 @@ version: "2.3"
services:
test_unit:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/project-history
- ../../node_modules:/overleaf/node_modules
@@ -14,49 +14,58 @@ services:
working_dir: /overleaf/services/project-history
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/project-history
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
+ - ../../bin/shared/wait_for_it:/overleaf/bin/shared/wait_for_it
working_dir: /overleaf/services/project-history
environment:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
+ HISTORY_REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
- condition: service_healthy
+ condition: service_started
redis:
condition: service_healthy
+ entrypoint: /overleaf/bin/shared/wait_for_it mongo:27017 --timeout=0 --
command: npm run --silent test:acceptance
redis:
- image: redis
+ image: redis:7.4.3
healthcheck:
test: ping=$$(redis-cli ping) && [ "$$ping" = 'PONG' ]
interval: 1s
retries: 20
mongo:
- image: mongo:6.0.13
+ image: mongo:8.0.11
command: --replSet overleaf
- healthcheck:
- test: "mongosh --quiet localhost/test --eval 'quit(db.runCommand({ ping: 1 }).ok ? 0 : 1)'"
- interval: 1s
- retries: 20
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/project-history/package.json b/services/project-history/package.json
index 3be1e70f54..4160f36f6f 100644
--- a/services/project-history/package.json
+++ b/services/project-history/package.json
@@ -9,8 +9,8 @@
"test:unit": "npm run test:unit:_run -- --grep=$MOCHA_GREP",
"start": "node app.js",
"nodemon": "node --watch app.js",
- "test:acceptance:_run": "LOG_LEVEL=fatal mocha --loader=esmock --recursive --reporter spec --timeout 15000 --exit $@ test/acceptance/js",
- "test:unit:_run": "LOG_LEVEL=fatal mocha --loader=esmock --recursive --reporter spec $@ test/unit/js",
+ "test:acceptance:_run": "mocha --loader=esmock --recursive --reporter spec --timeout 15000 --exit $@ test/acceptance/js",
+ "test:unit:_run": "mocha --loader=esmock --recursive --reporter spec $@ test/unit/js",
"lint": "eslint --max-warnings 0 --format unix .",
"format": "prettier --list-different $PWD/'**/*.*js'",
"format:fix": "prettier --write $PWD/'**/*.*js'",
@@ -25,24 +25,26 @@
"@overleaf/promise-utils": "*",
"@overleaf/redis-wrapper": "*",
"@overleaf/settings": "*",
+ "@overleaf/stream-utils": "*",
"async": "^3.2.5",
"aws-sdk": "^2.650.0",
"body-parser": "^1.20.3",
"bunyan": "^1.8.15",
"celebrate": "^15.0.3",
- "cli": "^1.0.1",
"diff-match-patch": "overleaf/diff-match-patch#89805f9c671a77a263fc53461acd62aa7498f688",
"esmock": "^2.6.3",
- "express": "^4.21.0",
+ "express": "^4.21.2",
"lodash": "^4.17.20",
+ "minimist": "^1.2.8",
"mongodb-legacy": "6.1.3",
"overleaf-editor-core": "*",
+ "p-queue": "^8.1.0",
"request": "^2.88.2"
},
"devDependencies": {
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
- "mocha": "^10.2.0",
+ "mocha": "^11.1.0",
"nock": "^13.5.3",
"sinon": "~9.0.1",
"sinon-chai": "^3.7.0",
diff --git a/services/project-history/scripts/bulk_resync_file_fix_up.mjs b/services/project-history/scripts/bulk_resync_file_fix_up.mjs
new file mode 100644
index 0000000000..10ea18ba00
--- /dev/null
+++ b/services/project-history/scripts/bulk_resync_file_fix_up.mjs
@@ -0,0 +1,328 @@
+// @ts-check
+import Events from 'node:events'
+import { setTimeout } from 'node:timers/promises'
+import readline from 'node:readline'
+import fs from 'node:fs'
+import minimist from 'minimist'
+import { ObjectId } from 'mongodb'
+import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
+import logger from '@overleaf/logger'
+import Metrics from '@overleaf/metrics'
+import OError from '@overleaf/o-error'
+import { promiseMapWithLimit } from '@overleaf/promise-utils'
+import { db, mongoClient } from '../app/js/mongodb.js'
+import * as HistoryStoreManager from '../app/js/HistoryStoreManager.js'
+import * as RedisManager from '../app/js/RedisManager.js'
+import * as SyncManager from '../app/js/SyncManager.js'
+import * as UpdatesProcessor from '../app/js/UpdatesProcessor.js'
+import { NeedFullProjectStructureResyncError } from '../app/js/Errors.js'
+import * as ErrorRecorder from '../app/js/ErrorRecorder.js'
+
+// Silence warning.
+Events.setMaxListeners(20)
+
+// Enable caching for ObjectId.toString()
+ObjectId.cacheHexString = true
+
+const READ_CONCURRENCY = parseInt(process.env.READ_CONCURRENCY || '100', 10)
+const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY || '10', 10)
+const FLUSH_RETRIES = parseInt(process.env.FLUSH_RETRIES || '20', 10)
+
+// Relevant dates:
+// - 2024-12-19, start of event-hold removal in filestore bucket -> objects older than 24h are (soft-)deleted.
+// - 2024-12-23, copy operation skipped in filestore when cloning project -> objects not created on clone.
+// - 2025-01-24, no more filestore reads allowed in project-history -> no more empty files in history for 404s
+const FILESTORE_SOFT_DELETE_START = new Date('2024-12-19T00:00:00Z')
+const FILESTORE_READ_OFF = new Date('2025-01-24T15:00:00Z')
+
+const argv = minimist(process.argv.slice(2), {
+ string: ['logs', 'log-latency'],
+})
+const LOG_LATENCY = argv['log-latency'] === 'true'
+
+let gracefulShutdownInitiated = false
+
+process.on('SIGINT', handleSignal)
+process.on('SIGTERM', handleSignal)
+
+function handleSignal() {
+ gracefulShutdownInitiated = true
+ console.warn('graceful shutdown initiated, draining queue')
+}
+
+const STATS = {
+ processedLines: 0,
+ success: 0,
+ changed: 0,
+ failure: 0,
+ skipped: 0,
+ checkFailure: 0,
+}
+
+function logStats() {
+ console.log(
+ JSON.stringify({
+ time: new Date(),
+ gracefulShutdownInitiated,
+ ...STATS,
+ })
+ )
+}
+const logInterval = setInterval(logStats, 10_000)
+
+/**
+ * @typedef {Object} FileRef
+ * @property {ObjectId} _id
+ * @property {any} linkedFileData
+ */
+
+/**
+ * @typedef {Object} Folder
+ * @property {Array} folders
+ * @property {Array} fileRefs
+ */
+
+/**
+ * @typedef {Object} Project
+ * @property {ObjectId} _id
+ * @property {Date} lastUpdated
+ * @property {Array} rootFolder
+ * @property {{history: {id: (number|string)}}} overleaf
+ */
+
+/**
+ * @param {Folder} folder
+ * @return {boolean}
+ */
+function checkFileTreeNeedsResync(folder) {
+ if (!folder) return false
+ if (Array.isArray(folder.fileRefs)) {
+ for (const fileRef of folder.fileRefs) {
+ if (fileRef.linkedFileData) return true
+ if (fileRef._id.getTimestamp() > FILESTORE_SOFT_DELETE_START) return true
+ }
+ }
+ if (Array.isArray(folder.folders)) {
+ for (const child of folder.folders) {
+ if (checkFileTreeNeedsResync(child)) return true
+ }
+ }
+ return false
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} historyId
+ * @return {Promise}
+ */
+async function getLastEndTimestamp(projectId, historyId) {
+ const raw = await HistoryStoreManager.promises.getMostRecentVersionRaw(
+ projectId,
+ historyId,
+ { readOnly: true }
+ )
+ if (!raw) throw new Error('bug: history not initialized')
+ return raw.endTimestamp
+}
+
+/** @type {Record Promise>} */
+const conditions = {
+ // cheap: in-memory mongo lookup
+ 'updated after filestore soft-delete': async function (project) {
+ return project.lastUpdated > FILESTORE_SOFT_DELETE_START
+ },
+ // cheap: in-memory mongo lookup
+ 'file-tree requires re-sync': async function (project) {
+ return checkFileTreeNeedsResync(project.rootFolder?.[0])
+ },
+ // moderate: GET from Redis
+ 'has pending operations': async function (project) {
+ const n = await RedisManager.promises.countUnprocessedUpdates(
+ project._id.toString()
+ )
+ return n > 0
+ },
+ // expensive: GET from Mongo/Postgres via history-v1 HTTP API call
+ 'has been flushed after filestore soft-delete': async function (project) {
+ // Resyncs started after soft-deleting can trigger 404s and result in empty files.
+ const endTimestamp = await getLastEndTimestamp(
+ project._id.toString(),
+ project.overleaf.history.id.toString()
+ )
+ return endTimestamp > FILESTORE_SOFT_DELETE_START
+ },
+}
+
+/**
+ * @param {Project} project
+ * @return {Promise<{projectId: string, historyId: string} | null>}
+ */
+async function checkProject(project) {
+ if (gracefulShutdownInitiated) return null
+ if (project._id.getTimestamp() > FILESTORE_READ_OFF) {
+ STATS.skipped++ // Project created after all bugs were fixed.
+ return null
+ }
+ const projectId = project._id.toString()
+ const historyId = project.overleaf.history.id.toString()
+ for (const [condition, check] of Object.entries(conditions)) {
+ try {
+ if (await check(project)) return { projectId, historyId }
+ } catch (err) {
+ logger.err({ projectId, condition, err }, 'failed to check project')
+ STATS.checkFailure++
+ return null
+ }
+ }
+ STATS.skipped++
+ return null
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} historyId
+ * @return {Promise}
+ */
+async function processProject(projectId, historyId) {
+ if (gracefulShutdownInitiated) return
+ const t0 = performance.now()
+ try {
+ await tryProcessProject(projectId, historyId)
+ const latency = performance.now() - t0
+ if (LOG_LATENCY) {
+ logger.info({ projectId, historyId, latency }, 'processed project')
+ }
+ STATS.success++
+ } catch (err) {
+ logger.err({ err, projectId, historyId }, 'failed to process project')
+ STATS.failure++
+ }
+}
+
+/**
+ * @param {string} projectId
+ * @return {Promise}
+ */
+async function flushWithRetries(projectId) {
+ for (let attempt = 0; attempt < FLUSH_RETRIES; attempt++) {
+ try {
+ await UpdatesProcessor.promises.processUpdatesForProject(projectId)
+ return
+ } catch (err) {
+ logger.warn(
+ { projectId, err, attempt },
+ 'failed to flush updates, trying again'
+ )
+ if (gracefulShutdownInitiated) throw err
+ }
+ }
+
+ try {
+ await UpdatesProcessor.promises.processUpdatesForProject(projectId)
+ } catch (err) {
+ // @ts-ignore err is Error
+ throw new OError('failed to flush updates', {}, err)
+ }
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} historyId
+ * @return {Promise}
+ */
+async function tryProcessProject(projectId, historyId) {
+ await flushWithRetries(projectId)
+ const start = new Date()
+ let needsFullSync = false
+ try {
+ await UpdatesProcessor.promises.startResyncAndProcessUpdatesUnderLock(
+ projectId,
+ { resyncProjectStructureOnly: true }
+ )
+ } catch (err) {
+ if (err instanceof NeedFullProjectStructureResyncError) {
+ needsFullSync = true
+ } else {
+ throw err
+ }
+ }
+ if (needsFullSync) {
+ logger.warn(
+ { projectId, historyId },
+ 'structure only resync not sufficient, doing full soft resync'
+ )
+ await SyncManager.promises.startResync(projectId, {})
+ await UpdatesProcessor.promises.processUpdatesForProject(projectId)
+ STATS.changed++
+ } else {
+ const after = await getLastEndTimestamp(projectId, historyId)
+ if (after > start) {
+ STATS.changed++
+ }
+ }
+ // Avoid db.projectHistorySyncState from growing for each project we resynced.
+ // MongoDB collections cannot shrink on their own. In case of success, purge
+ // the db entry created by this script right away.
+ await SyncManager.promises.clearResyncStateIfAllAfter(projectId, start)
+}
+
+async function processBatch(projects) {
+ const projectIds = (
+ await promiseMapWithLimit(READ_CONCURRENCY, projects, checkProject)
+ ).filter(id => !!id)
+ await promiseMapWithLimit(WRITE_CONCURRENCY, projectIds, ids =>
+ processProject(ids.projectId, ids.historyId)
+ )
+
+ if (gracefulShutdownInitiated) throw new Error('graceful shutdown triggered')
+}
+
+async function processProjectsFromLog() {
+ const rl = readline.createInterface({
+ input: fs.createReadStream(argv.logs),
+ })
+ for await (const line of rl) {
+ if (gracefulShutdownInitiated) break
+ STATS.processedLines++
+ if (!line.startsWith('{')) continue
+ const { projectId, historyId, msg } = JSON.parse(line)
+ if (msg !== 'failed to process project') continue
+ await processProject(projectId, historyId) // does try/catch with logging
+ }
+}
+
+async function main() {
+ if (argv.logs) {
+ await processProjectsFromLog()
+ return
+ }
+ await batchedUpdate(db.projects, {}, processBatch, {
+ _id: 1,
+ lastUpdated: 1,
+ 'overleaf.history': 1,
+ rootFolder: 1,
+ })
+}
+
+try {
+ try {
+ await main()
+ } finally {
+ clearInterval(logInterval)
+ logStats()
+ Metrics.close()
+ await mongoClient.close()
+ // TODO(das7pad): graceful shutdown for redis. Refactor process.exit when done.
+ }
+ console.log('Done.')
+ await setTimeout(1_000)
+ if (STATS.failure) {
+ process.exit(Math.min(STATS.failure, 99))
+ } else {
+ process.exit(0)
+ }
+} catch (err) {
+ logger.err({ err }, 'fatal error')
+ await setTimeout(1_000)
+ process.exit(100)
+}
diff --git a/services/project-history/scripts/debug_translate_updates.js b/services/project-history/scripts/debug_translate_updates.js
index b3fdc31bd9..bb896371ba 100755
--- a/services/project-history/scripts/debug_translate_updates.js
+++ b/services/project-history/scripts/debug_translate_updates.js
@@ -25,6 +25,7 @@ function expandResyncProjectStructure(chunk, update) {
SyncManager.expandSyncUpdates(
projectId,
99999, // dummy history id
+ chunk,
[update],
cb => cb(), // extend lock
(err, result) => {
diff --git a/services/project-history/scripts/flush_old.js b/services/project-history/scripts/flush_old.js
new file mode 100644
index 0000000000..7ac13b757a
--- /dev/null
+++ b/services/project-history/scripts/flush_old.js
@@ -0,0 +1,194 @@
+#!/usr/bin/env node
+
+import Settings from '@overleaf/settings'
+import minimist from 'minimist'
+import logger from '@overleaf/logger'
+import PQueue from 'p-queue'
+import * as RedisManager from '../app/js/RedisManager.js'
+import * as ErrorRecorder from '../app/js/ErrorRecorder.js'
+
+logger.logger.level('fatal')
+
+function usage() {
+ console.log(`
+Usage: flush_old.js [options]
+
+Options:
+ -b, --batch-size Number of projects to process in each batch (default: 100)
+ -a, --max-age Maximum age of projects to keep (default: 3600)
+ -i, --interval Interval to spread the processing over (default: 300)
+ -c, --concurrency Number of concurrent jobs (default: 10)
+ -u, --buffer Buffer time in seconds to reserve at end (default: 15)
+ -n, --dry-run Show what would be done without making changes
+ -h, --help Show this help message
+
+Examples:
+ # Flush projects older than 24 hours with 5 concurrent jobs
+ flush_old.js --batch-size 100 --max-age 86400 -c 5
+
+ # Dry run to see what would be flushed
+ flush_old.js --max-age 3600 --dry-run
+`)
+ process.exit(0)
+}
+
+const argv = minimist(process.argv.slice(2), {
+ boolean: ['dry-run', 'help'],
+ alias: {
+ b: 'batch-size',
+ a: 'max-age',
+ i: 'interval',
+ c: 'concurrency',
+ n: 'dry-run',
+ u: 'buffer',
+ h: 'help',
+ },
+ default: {
+ 'batch-size': 100,
+ 'max-age': 3600,
+ interval: 300,
+ concurrency: 10,
+ 'dry-run': false,
+ buffer: 15,
+ help: false,
+ },
+})
+
+if (argv.help || process.argv.length === 2) {
+ usage()
+}
+
+const batchSize = parseInt(argv['batch-size'], 10)
+const maxAge = argv['max-age'] ? parseInt(argv['max-age'], 10) : null
+const interval = parseInt(argv.interval, 10) || 300
+const concurrency = parseInt(argv.concurrency, 10) || 10
+const bufferTime = parseInt(argv.buffer, 10) || 15
+const dryRun = argv['dry-run']
+
+/**
+ * Generator function that yields batches of items from an array
+ * @param {Array} array - The array to batch
+ * @param {number} size - The size of each batch
+ * @yields {Array} A batch of items
+ */
+function* getBatches(array, size) {
+ for (let i = 0; i < array.length; i += size) {
+ yield array.slice(i, i + size)
+ }
+}
+
+let flushCount = 0
+
+async function flushProject({ projectId, timestamp }) {
+ const url = `${Settings.apis.project_history.url}/project/${projectId}/flush`
+ if (dryRun) {
+ console.log(`[DRY RUN] would flush project ${projectId}`)
+ return
+ }
+ const response = await fetch(url, {
+ method: 'POST',
+ })
+ flushCount++
+ if (flushCount % 100 === 0) {
+ console.log('flushed', flushCount, 'projects, up to', timestamp)
+ }
+ if (!response.ok) {
+ throw new Error(`failed to flush project ${projectId}`)
+ }
+}
+
+const SCRIPT_START_TIME = Date.now() // current time in milliseconds from start of script
+
+function olderThan(maxAge, timestamp) {
+ const age = (SCRIPT_START_TIME - timestamp) / 1000
+ return age > maxAge
+}
+
+async function main() {
+ const projectIds = await RedisManager.promises.getProjectIdsWithHistoryOps()
+ const failedProjects = await ErrorRecorder.promises.getFailedProjects()
+ const failedProjectIds = new Set(failedProjects.map(p => p.project_id))
+
+ const projectIdsToProcess = projectIds.filter(p => !failedProjectIds.has(p))
+ console.log('number of projects with history ops', projectIds.length)
+ console.log(
+ 'number of failed projects to exclude',
+ projectIds.length - projectIdsToProcess.length
+ )
+ const collectedProjects = []
+ let nullCount = 0
+ // iterate over the project ids in batches of doing a redis MGET to retrieve the first op timestamps
+ for (const batch of getBatches(projectIdsToProcess, batchSize)) {
+ const timestamps = await RedisManager.promises.getFirstOpTimestamps(batch)
+ const newProjects = batch
+ .map((projectId, idx) => {
+ return { projectId, timestamp: timestamps[idx] }
+ })
+ .filter(({ projectId, timestamp }) => {
+ if (!timestamp) {
+ nullCount++
+ return true // Unknown age
+ }
+ if (olderThan(maxAge, timestamp)) return true // Older than threshold
+ if (Settings.shortHistoryQueues.includes(projectId)) return true // Short queue
+ return false // Do not flush
+ })
+ collectedProjects.push(...newProjects)
+ }
+ // sort the collected projects by ascending timestamp
+ collectedProjects.sort((a, b) => a.timestamp - b.timestamp)
+
+ console.log('number of projects to flush', collectedProjects.length)
+ console.log('number with null timestamps', nullCount)
+
+ const elapsedTime = Math.floor((Date.now() - SCRIPT_START_TIME) / 1000)
+ console.log('elapsed time', elapsedTime, 'seconds, buffer time', bufferTime)
+ const remainingTime = Math.max(interval - elapsedTime - bufferTime, 0)
+ console.log('remaining time', remainingTime, 'seconds')
+
+ const jobsPerSecond = Math.max(
+ Math.ceil(collectedProjects.length / Math.max(remainingTime, 60)),
+ 1
+ )
+ console.log('interval', interval, 'seconds')
+ console.log('jobs per second', jobsPerSecond)
+ console.log('concurrency', concurrency)
+
+ const queue = new PQueue({
+ concurrency,
+ interval: 1000,
+ intervalCap: jobsPerSecond,
+ })
+
+ const taskFns = collectedProjects.map(project => {
+ return async () => {
+ try {
+ await flushProject(project)
+ return { status: 'fulfilled', value: project }
+ } catch (error) {
+ return { status: 'rejected', reason: error, project }
+ }
+ }
+ })
+
+ const results = await queue.addAll(taskFns)
+
+ console.log(
+ 'finished after',
+ Math.floor((Date.now() - SCRIPT_START_TIME) / 1000),
+ 'seconds'
+ )
+ // count the number of successful and failed flushes
+ const success = results.filter(r => r.status === 'fulfilled').length
+ const failed = results.filter(r => r.status === 'rejected').length
+ console.log('completed', { success, failed })
+}
+
+main()
+ .then(() => {
+ process.exit(0)
+ })
+ .catch(err => {
+ console.error(err)
+ process.exit(1)
+ })
diff --git a/services/project-history/scripts/retry_failures.js b/services/project-history/scripts/retry_failures.js
new file mode 100755
index 0000000000..85ee21faf4
--- /dev/null
+++ b/services/project-history/scripts/retry_failures.js
@@ -0,0 +1,26 @@
+import * as RetryManager from '../app/js/RetryManager.js'
+import minimist from 'minimist'
+
+const args = minimist(process.argv.slice(2), {
+ string: ['failureType', 'timeout', 'limit'],
+ default: {
+ failureType: 'soft',
+ timeout: (60 * 60 * 1000).toString(),
+ limit: (100_000).toString(),
+ },
+})
+
+const failureType = args.failureType
+const timeout = parseInt(args.timeout, 10)
+const limit = parseInt(args.limit, 10)
+
+RetryManager.retryFailures({ failureType, timeout, limit }, (err, result) => {
+ if (err) {
+ console.error(err)
+ process.exit(1)
+ } else {
+ console.log(JSON.stringify(result))
+ console.log('Done.')
+ }
+ process.exit(0)
+})
diff --git a/services/project-history/test/acceptance/js/FlushManagerTests.js b/services/project-history/test/acceptance/js/FlushManagerTests.js
index d11346d9a3..8d4432d3ef 100644
--- a/services/project-history/test/acceptance/js/FlushManagerTests.js
+++ b/services/project-history/test/acceptance/js/FlushManagerTests.js
@@ -6,6 +6,7 @@ import assert from 'node:assert'
import mongodb from 'mongodb-legacy'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
+import Settings from '@overleaf/settings'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
@@ -127,7 +128,7 @@ describe('Flushing old queues', function () {
'made calls to history service to store updates in the background'
)
done()
- }, 100)
+ }, 1_000)
}
)
})
@@ -183,6 +184,88 @@ describe('Flushing old queues', function () {
})
})
+ describe('when the update is newer than the cutoff and project has short queue', function () {
+ beforeEach(function () {
+ Settings.shortHistoryQueues.push(this.projectId)
+ })
+ afterEach(function () {
+ Settings.shortHistoryQueues.length = 0
+ })
+ beforeEach(function (done) {
+ this.flushCall = MockHistoryStore()
+ .put(
+ `/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
+ )
+ .reply(201)
+ .post(`/api/projects/${historyId}/legacy_changes?end_version=0`)
+ .reply(200)
+ const update = {
+ pathname: '/main.tex',
+ docLines: 'a\nb',
+ doc: this.docId,
+ meta: { user_id: this.user_id, ts: new Date() },
+ }
+ async.series(
+ [
+ cb =>
+ ProjectHistoryClient.pushRawUpdate(this.projectId, update, cb),
+ cb =>
+ ProjectHistoryClient.setFirstOpTimestamp(
+ this.projectId,
+ Date.now() - 60 * 1000,
+ cb
+ ),
+ ],
+ done
+ )
+ })
+
+ it('flushes the project history queue', function (done) {
+ request.post(
+ {
+ url: `http://127.0.0.1:3054/flush/old?maxAge=${3 * 3600}`,
+ },
+ (error, res, body) => {
+ if (error) {
+ return done(error)
+ }
+ expect(res.statusCode).to.equal(200)
+ assert(
+ this.flushCall.isDone(),
+ 'made calls to history service to store updates'
+ )
+ done()
+ }
+ )
+ })
+
+ it('flushes the project history queue in the background when requested', function (done) {
+ request.post(
+ {
+ url: `http://127.0.0.1:3054/flush/old?maxAge=${3 * 3600}&background=1`,
+ },
+ (error, res, body) => {
+ if (error) {
+ return done(error)
+ }
+ expect(res.statusCode).to.equal(200)
+ expect(body).to.equal('{"message":"running flush in background"}')
+ assert(
+ !this.flushCall.isDone(),
+ 'did not make calls to history service to store updates in the foreground'
+ )
+ setTimeout(() => {
+ assert(
+ this.flushCall.isDone(),
+ 'made calls to history service to store updates in the background'
+ )
+ done()
+ }, 1_000)
+ }
+ )
+ })
+ })
+
describe('when the update does not have a timestamp', function () {
beforeEach(function (done) {
this.flushCall = MockHistoryStore()
diff --git a/services/project-history/test/acceptance/js/GetChangesSince.js b/services/project-history/test/acceptance/js/GetChangesSince.js
deleted file mode 100644
index 559432fc73..0000000000
--- a/services/project-history/test/acceptance/js/GetChangesSince.js
+++ /dev/null
@@ -1,122 +0,0 @@
-import { expect } from 'chai'
-import mongodb from 'mongodb-legacy'
-import nock from 'nock'
-import Core from 'overleaf-editor-core'
-import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
-import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
-import latestChunk from '../fixtures/chunks/7-8.json' with { type: 'json' }
-import previousChunk from '../fixtures/chunks/4-6.json' with { type: 'json' }
-import firstChunk from '../fixtures/chunks/0-3.json' with { type: 'json' }
-const { ObjectId } = mongodb
-
-const MockHistoryStore = () => nock('http://127.0.0.1:3100')
-const MockWeb = () => nock('http://127.0.0.1:3000')
-
-const fixture = path => new URL(`../fixtures/${path}`, import.meta.url)
-
-describe('GetChangesSince', function () {
- let projectId, historyId
- beforeEach(function (done) {
- projectId = new ObjectId().toString()
- historyId = new ObjectId().toString()
- ProjectHistoryApp.ensureRunning(error => {
- if (error) throw error
-
- MockHistoryStore().post('/api/projects').reply(200, {
- projectId: historyId,
- })
-
- ProjectHistoryClient.initializeProject(historyId, (error, olProject) => {
- if (error) throw error
- MockWeb()
- .get(`/project/${projectId}/details`)
- .reply(200, {
- name: 'Test Project',
- overleaf: { history: { id: olProject.id } },
- })
-
- MockHistoryStore()
- .get(`/api/projects/${historyId}/latest/history`)
- .replyWithFile(200, fixture('chunks/7-8.json'))
- MockHistoryStore()
- .get(`/api/projects/${historyId}/versions/6/history`)
- .replyWithFile(200, fixture('chunks/4-6.json'))
- MockHistoryStore()
- .get(`/api/projects/${historyId}/versions/3/history`)
- .replyWithFile(200, fixture('chunks/0-3.json'))
-
- done()
- })
- })
- })
-
- afterEach(function () {
- nock.cleanAll()
- })
-
- function expectChangesSince(version, changes, done) {
- ProjectHistoryClient.getChangesSince(
- projectId,
- version,
- {},
- (error, got) => {
- if (error) throw error
- expect(got.map(c => Core.Change.fromRaw(c))).to.deep.equal(
- changes.map(c => Core.Change.fromRaw(c))
- )
- done()
- }
- )
- }
-
- it('should return zero changes since the latest version', function (done) {
- expectChangesSince(8, [], done)
- })
-
- it('should return one change when behind one version', function (done) {
- expectChangesSince(7, [latestChunk.chunk.history.changes[1]], done)
- })
-
- it('should return changes when at the chunk boundary', function (done) {
- expect(latestChunk.chunk.startVersion).to.equal(6)
- expectChangesSince(6, latestChunk.chunk.history.changes, done)
- })
-
- it('should return changes spanning multiple chunks', function (done) {
- expectChangesSince(
- 1,
- [
- ...firstChunk.chunk.history.changes.slice(1),
- ...previousChunk.chunk.history.changes,
- ...latestChunk.chunk.history.changes,
- ],
- done
- )
- })
-
- it('should return all changes when going back to the beginning', function (done) {
- expectChangesSince(
- 0,
- [
- ...firstChunk.chunk.history.changes,
- ...previousChunk.chunk.history.changes,
- ...latestChunk.chunk.history.changes,
- ],
- done
- )
- })
-
- it('should return an error when past the end version', function (done) {
- ProjectHistoryClient.getChangesSince(
- projectId,
- 9,
- { allowErrors: true },
- (error, body, statusCode) => {
- if (error) throw error
- expect(statusCode).to.equal(500)
- expect(body).to.deep.equal({ message: 'an internal error occurred' })
- done()
- }
- )
- })
-})
diff --git a/services/project-history/test/acceptance/js/SyncTests.js b/services/project-history/test/acceptance/js/SyncTests.js
index 27db3434b4..f7420e6cdb 100644
--- a/services/project-history/test/acceptance/js/SyncTests.js
+++ b/services/project-history/test/acceptance/js/SyncTests.js
@@ -4,8 +4,12 @@ import { expect } from 'chai'
import request from 'request'
import assert from 'node:assert'
import mongodb from 'mongodb-legacy'
+import logger from '@overleaf/logger'
+import Settings from '@overleaf/settings'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
+import sinon from 'sinon'
+import { getFailure } from './helpers/ProjectHistoryClient.js'
const { ObjectId } = mongodb
const EMPTY_FILE_HASH = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
@@ -16,6 +20,15 @@ const MockWeb = () => nock('http://127.0.0.1:3000')
describe('Syncing with web and doc-updater', function () {
const historyId = new ObjectId().toString()
+ let loggerWarn, loggerError
+ beforeEach(function () {
+ loggerWarn = sinon.spy(logger, 'warn')
+ loggerError = sinon.spy(logger, 'error')
+ })
+ afterEach(function () {
+ loggerWarn.restore()
+ loggerError.restore()
+ })
beforeEach(function (done) {
this.timestamp = new Date()
@@ -41,16 +54,6 @@ describe('Syncing with web and doc-updater', function () {
},
},
})
- MockHistoryStore()
- .get(`/api/projects/${historyId}/latest/history`)
- .reply(200, {
- chunk: {
- startVersion: 0,
- history: {
- changes: [],
- },
- },
- })
ProjectHistoryClient.initializeProject(historyId, done)
})
})
@@ -202,7 +205,113 @@ describe('Syncing with web and doc-updater', function () {
MockFileStore()
.get(`/project/${this.project_id}/file/${this.file_id}`)
.reply(200, fileContents)
+ const headBlob = MockHistoryStore()
+ .head(`/api/projects/${historyId}/blobs/${fileHash}`)
+ .reply(404)
+ const createBlob = MockHistoryStore()
+ .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents)
+ .reply(201)
+ const addFile = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'test.png',
+ file: {
+ hash: fileHash,
+ },
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [],
+ files: [
+ {
+ file: this.file_id,
+ path: '/test.png',
+ _hash: fileHash,
+ url: `http://127.0.0.1:3009/project/${this.project_id}/file/${this.file_id}`,
+ },
+ { path: '/persistedFile' },
+ ],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ throw error
+ }
+ assert(!loggerWarn.called, 'no warning logged on 404')
+ assert(
+ headBlob.isDone(),
+ 'HEAD /api/projects/:historyId/blobs/:hash should have been called'
+ )
+ assert(
+ createBlob.isDone(),
+ '/api/projects/:historyId/blobs/:hash should have been called'
+ )
+ assert(
+ addFile.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+ it('should skip HEAD on blob without hash', function (done) {
+ MockHistoryStore()
+ .get(`/api/projects/${historyId}/latest/history`)
+ .reply(200, {
+ chunk: {
+ history: {
+ snapshot: {
+ files: {
+ persistedFile: { hash: EMPTY_FILE_HASH, byteLength: 0 },
+ },
+ },
+ changes: [],
+ },
+ startVersion: 0,
+ },
+ })
+
+ const fileContents = Buffer.from([1, 2, 3])
+ const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
+
+ MockFileStore()
+ .get(`/project/${this.project_id}/file/${this.file_id}`)
+ .reply(200, fileContents)
+ const headBlob = MockHistoryStore()
+ .head(`/api/projects/${historyId}/blobs/undefined`)
+ .reply(500)
const createBlob = MockHistoryStore()
.put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents)
.reply(201)
@@ -263,6 +372,11 @@ describe('Syncing with web and doc-updater', function () {
if (error) {
throw error
}
+ assert(!loggerWarn.called, 'no warning logged on 404')
+ assert(
+ !headBlob.isDone(),
+ 'HEAD /api/projects/:historyId/blobs/:hash should have been skipped'
+ )
assert(
createBlob.isDone(),
'/api/projects/:historyId/blobs/:hash should have been called'
@@ -275,6 +389,588 @@ describe('Syncing with web and doc-updater', function () {
}
)
})
+ it('should record error when checking blob fails with 500', function (done) {
+ MockHistoryStore()
+ .get(`/api/projects/${historyId}/latest/history`)
+ .reply(200, {
+ chunk: {
+ history: {
+ snapshot: {
+ files: {
+ persistedFile: { hash: EMPTY_FILE_HASH, byteLength: 0 },
+ },
+ },
+ changes: [],
+ },
+ startVersion: 0,
+ },
+ })
+
+ const fileContents = Buffer.from([1, 2, 3])
+ const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
+
+ MockFileStore()
+ .get(`/project/${this.project_id}/file/${this.file_id}`)
+ .reply(200, fileContents)
+ const headBlob = MockHistoryStore()
+ .head(`/api/projects/${historyId}/blobs/${fileHash}`)
+ .reply(500)
+ const createBlob = MockHistoryStore()
+ .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents)
+ .reply(201)
+
+ const addFile = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'test.png',
+ file: {
+ hash: fileHash,
+ },
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [],
+ files: [
+ {
+ file: this.file_id,
+ path: '/test.png',
+ _hash: fileHash,
+ url: `http://127.0.0.1:3009/project/${this.project_id}/file/${this.file_id}`,
+ },
+ { path: '/persistedFile' },
+ ],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(
+ this.project_id,
+ {
+ allowErrors: true,
+ },
+ (err, res) => {
+ if (err) return cb(err)
+ assert(res.statusCode === 500, 'resync should have failed')
+ cb()
+ }
+ )
+ },
+ ],
+ error => {
+ if (error) {
+ throw error
+ }
+ assert(
+ loggerError.calledWithMatch(
+ sinon.match.any,
+ 'error checking whether blob exists'
+ ),
+ 'error logged on 500'
+ )
+ assert(
+ headBlob.isDone(),
+ 'HEAD /api/projects/:historyId/blobs/:hash should have been called'
+ )
+ assert(
+ !createBlob.isDone(),
+ '/api/projects/:historyId/blobs/:hash should have been skipped'
+ )
+ assert(
+ !addFile.isDone(),
+ `/api/projects/${historyId}/changes should have been skipped`
+ )
+ done()
+ }
+ )
+ })
+ it('should skip blob write when blob exists', function (done) {
+ MockHistoryStore()
+ .get(`/api/projects/${historyId}/latest/history`)
+ .reply(200, {
+ chunk: {
+ history: {
+ snapshot: {
+ files: {
+ persistedFile: { hash: EMPTY_FILE_HASH, byteLength: 0 },
+ },
+ },
+ changes: [],
+ },
+ startVersion: 0,
+ },
+ })
+
+ const fileContents = Buffer.from([1, 2, 3])
+ const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
+
+ MockFileStore()
+ .get(`/project/${this.project_id}/file/${this.file_id}`)
+ .reply(200, fileContents)
+ const headBlob = MockHistoryStore()
+ .head(`/api/projects/${historyId}/blobs/${fileHash}`)
+ .reply(200)
+ const createBlob = MockHistoryStore()
+ .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents)
+ .reply(201)
+
+ const addFile = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'test.png',
+ file: {
+ hash: fileHash,
+ },
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [],
+ files: [
+ {
+ file: this.file_id,
+ path: '/test.png',
+ _hash: fileHash,
+ url: `http://127.0.0.1:3009/project/${this.project_id}/file/${this.file_id}`,
+ },
+ { path: '/persistedFile' },
+ ],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ throw error
+ }
+ assert(!loggerWarn.called, 'no warning logged on 404')
+ assert(
+ headBlob.isDone(),
+ 'HEAD /api/projects/:historyId/blobs/:hash should have been called'
+ )
+ assert(
+ !createBlob.isDone(),
+ '/api/projects/:historyId/blobs/:hash should have been skipped'
+ )
+ assert(
+ addFile.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+ it('should add file w/o url', function (done) {
+ MockHistoryStore()
+ .get(`/api/projects/${historyId}/latest/history`)
+ .reply(200, {
+ chunk: {
+ history: {
+ snapshot: {
+ files: {
+ persistedFile: { hash: EMPTY_FILE_HASH, byteLength: 0 },
+ },
+ },
+ changes: [],
+ },
+ startVersion: 0,
+ },
+ })
+
+ const fileContents = Buffer.from([1, 2, 3])
+ const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
+
+ MockFileStore()
+ .get(`/project/${this.project_id}/file/${this.file_id}`)
+ .reply(200, fileContents)
+ const headBlob = MockHistoryStore()
+ .head(`/api/projects/${historyId}/blobs/${fileHash}`)
+ .reply(200)
+ const createBlob = MockHistoryStore()
+ .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents)
+ .reply(201)
+
+ const addFile = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'test.png',
+ file: {
+ hash: fileHash,
+ },
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [],
+ files: [
+ {
+ file: this.file_id,
+ path: '/test.png',
+ _hash: fileHash,
+ createdBlob: true,
+ },
+ { path: '/persistedFile' },
+ ],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ throw error
+ }
+ assert(!loggerWarn.called, 'no warning logged on 404')
+ assert(
+ headBlob.isDone(),
+ 'HEAD /api/projects/:historyId/blobs/:hash should have been called'
+ )
+ assert(
+ !createBlob.isDone(),
+ '/api/projects/:historyId/blobs/:hash should have been skipped'
+ )
+ assert(
+ addFile.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+ describe('with filestore disabled', function () {
+ before(function () {
+ Settings.apis.filestore.enabled = false
+ })
+ after(function () {
+ Settings.apis.filestore.enabled = true
+ })
+ it('should record error when blob is missing', function (done) {
+ MockHistoryStore()
+ .get(`/api/projects/${historyId}/latest/history`)
+ .reply(200, {
+ chunk: {
+ history: {
+ snapshot: {
+ files: {
+ persistedFile: { hash: EMPTY_FILE_HASH, byteLength: 0 },
+ },
+ },
+ changes: [],
+ },
+ startVersion: 0,
+ },
+ })
+
+ const fileContents = Buffer.from([1, 2, 3])
+ const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
+
+ MockFileStore()
+ .get(`/project/${this.project_id}/file/${this.file_id}`)
+ .reply(200, fileContents)
+ const headBlob = MockHistoryStore()
+ .head(`/api/projects/${historyId}/blobs/${fileHash}`)
+ .times(3) // three retries
+ .reply(404)
+ const createBlob = MockHistoryStore()
+ .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents)
+ .reply(201)
+
+ const addFile = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'test.png',
+ file: {
+ hash: fileHash,
+ },
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [],
+ files: [
+ {
+ file: this.file_id,
+ path: '/test.png',
+ _hash: fileHash,
+ url: `http://127.0.0.1:3009/project/${this.project_id}/file/${this.file_id}`,
+ },
+ { path: '/persistedFile' },
+ ],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(
+ this.project_id,
+ update,
+ cb
+ )
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(
+ this.project_id,
+ {
+ allowErrors: true,
+ },
+ (err, res) => {
+ if (err) return cb(err)
+ assert(
+ res.statusCode === 500,
+ 'resync should have failed'
+ )
+ cb()
+ }
+ )
+ },
+ ],
+ error => {
+ if (error) {
+ throw error
+ }
+ assert(
+ loggerError.calledWithMatch(
+ sinon.match.any,
+ 'blocking filestore read'
+ ),
+ 'error logged on 500'
+ )
+ assert(
+ headBlob.isDone(),
+ 'HEAD /api/projects/:historyId/blobs/:hash should have been called'
+ )
+ assert(
+ !createBlob.isDone(),
+ '/api/projects/:historyId/blobs/:hash should have been skipped'
+ )
+ assert(
+ !addFile.isDone(),
+ `/api/projects/${historyId}/changes should have been skipped`
+ )
+ done()
+ }
+ )
+ })
+ })
+ })
+
+ describe('when a file hash mismatches', function () {
+ it('should remove and re-add file w/o url', function (done) {
+ MockHistoryStore()
+ .get(`/api/projects/${historyId}/latest/history`)
+ .reply(200, {
+ chunk: {
+ history: {
+ snapshot: {
+ files: {
+ 'test.png': { hash: EMPTY_FILE_HASH, byteLength: 0 },
+ },
+ },
+ changes: [],
+ },
+ startVersion: 0,
+ },
+ })
+
+ const fileContents = Buffer.from([1, 2, 3])
+ const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
+
+ MockFileStore()
+ .get(`/project/${this.project_id}/file/${this.file_id}`)
+ .reply(200, fileContents)
+ const headBlob = MockHistoryStore()
+ .head(`/api/projects/${historyId}/blobs/${fileHash}`)
+ .reply(200)
+ const createBlob = MockHistoryStore()
+ .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents)
+ .reply(201)
+
+ const addFile = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'test.png',
+ newPathname: '',
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'test.png',
+ file: {
+ hash: fileHash,
+ },
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [],
+ files: [
+ {
+ file: this.file_id,
+ path: '/test.png',
+ _hash: fileHash,
+ createdBlob: true,
+ },
+ ],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ throw error
+ }
+ assert(!loggerWarn.called, 'no warning logged on 404')
+ assert(
+ headBlob.isDone(),
+ 'HEAD /api/projects/:historyId/blobs/:hash should have been called'
+ )
+ assert(
+ !createBlob.isDone(),
+ '/api/projects/:historyId/blobs/:hash should have been skipped'
+ )
+ assert(
+ addFile.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
})
describe("when a file exists which shouldn't", function () {
@@ -529,7 +1225,7 @@ describe('Syncing with web and doc-updater', function () {
)
})
- it('should fix comments in the history store', function (done) {
+ it('should add comments in the history store', function (done) {
const commentId = 'comment-id'
const addComment = MockHistoryStore()
.post(`/api/projects/${historyId}/legacy_changes`, body => {
@@ -619,6 +1315,1417 @@ describe('Syncing with web and doc-updater', function () {
}
)
})
+
+ it('should add comments in the history store (history-ot)', function (done) {
+ const commentId = 'comment-id'
+ const addComment = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ commentId,
+ ranges: [{ pos: 1, length: 10 }],
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ historyOTRanges: {
+ comments: [
+ {
+ id: commentId,
+ ranges: [
+ {
+ pos: 1,
+ length: 10,
+ },
+ ],
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ addComment.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+
+ it('should add tracked changes in the history store', function (done) {
+ const fixTrackedChange = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ textOperation: [
+ {
+ r: 1,
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'delete',
+ userId: 'user-id',
+ },
+ },
+ {
+ r: 1,
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ 1,
+ ],
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ ranges: {
+ changes: [
+ {
+ id: 'id1',
+ op: {
+ d: 'a',
+ p: 0,
+ },
+ metadata: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ {
+ id: 'id2',
+ op: {
+ i: '\n',
+ p: 0,
+ hpos: 1,
+ },
+ metadata: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ fixTrackedChange.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+
+ it('should add tracked changes in the history store (history-ot)', function (done) {
+ const fixTrackedChange = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ textOperation: [
+ {
+ r: 1,
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'delete',
+ userId: 'user-id',
+ },
+ },
+ {
+ r: 1,
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ 1,
+ ],
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ historyOTRanges: {
+ trackedChanges: [
+ {
+ range: { pos: 0, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'delete',
+ userId: 'user-id',
+ },
+ },
+ {
+ range: { pos: 1, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ fixTrackedChange.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+ })
+
+ describe("when a doc's ranges are out of sync", function () {
+ const commentId = 'comment-id'
+ beforeEach(function () {
+ MockHistoryStore()
+ .get(`/api/projects/${historyId}/latest/history`)
+ .reply(200, {
+ chunk: {
+ history: {
+ snapshot: {
+ files: {
+ 'main.tex': {
+ hash: '0a207c060e61f3b88eaee0a8cd0696f46fb155eb',
+ rangesHash: '0a207c060e61f3b88eaee0a8cd0696f46fb155ec',
+ stringLength: 3,
+ },
+ },
+ },
+ changes: [],
+ },
+ startVersion: 0,
+ },
+ })
+
+ MockHistoryStore()
+ .get(
+ `/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
+ )
+ .reply(200, 'a\nb')
+
+ MockHistoryStore()
+ .get(
+ `/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155ec`
+ )
+ .reply(
+ 200,
+ JSON.stringify({
+ comments: [{ id: commentId, ranges: [{ pos: 0, length: 3 }] }],
+ trackedChanges: [
+ {
+ range: { pos: 0, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'delete',
+ userId: 'user-id',
+ },
+ },
+ {
+ range: { pos: 2, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ ],
+ })
+ )
+ })
+
+ it('should fix comments in the history store', function (done) {
+ const addComment = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ commentId,
+ ranges: [{ pos: 1, length: 2 }],
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ ranges: {
+ comments: [
+ {
+ id: commentId,
+ op: {
+ c: 'a',
+ p: 0,
+ hpos: 1,
+ hlen: 2,
+ t: commentId,
+ },
+ meta: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ ],
+ changes: [
+ {
+ id: 'id1',
+ op: {
+ d: 'a',
+ p: 0,
+ },
+ metadata: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ {
+ id: 'id2',
+ op: {
+ i: '\n',
+ p: 1,
+ hpos: 2,
+ },
+ metadata: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ addComment.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+
+ it('should fix resolved state for comments in the history store', function (done) {
+ const addComment = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ commentId,
+ resolved: true,
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ resolvedCommentIds: [commentId],
+ ranges: {
+ comments: [
+ {
+ id: commentId,
+ op: {
+ c: 'a',
+ p: 0,
+ hpos: 0,
+ hlen: 3,
+ t: commentId,
+ },
+ meta: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ ],
+ changes: [
+ {
+ id: 'id1',
+ op: {
+ d: 'a',
+ p: 0,
+ },
+ metadata: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ {
+ id: 'id2',
+ op: {
+ i: '\n',
+ p: 1,
+ hpos: 2,
+ },
+ metadata: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ addComment.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+
+ it('should fix comments in the history store (history-ot)', function (done) {
+ const addComment = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ commentId,
+ ranges: [{ pos: 1, length: 2 }],
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ historyOTRanges: {
+ comments: [
+ {
+ id: commentId,
+ ranges: [
+ {
+ pos: 1,
+ length: 2,
+ },
+ ],
+ },
+ ],
+ trackedChanges: [
+ {
+ range: { pos: 0, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'delete',
+ userId: 'user-id',
+ },
+ },
+ {
+ range: { pos: 2, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ addComment.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+
+ it('should fix resolved state for comments in the history store (history-ot)', function (done) {
+ const addComment = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ commentId,
+ resolved: true,
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ historyOTRanges: {
+ comments: [
+ {
+ id: commentId,
+ ranges: [
+ {
+ pos: 0,
+ length: 3,
+ },
+ ],
+ resolved: true,
+ },
+ ],
+ trackedChanges: [
+ {
+ range: { pos: 0, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'delete',
+ userId: 'user-id',
+ },
+ },
+ {
+ range: { pos: 2, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ addComment.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+
+ it('should fix tracked changes in the history store', function (done) {
+ const fixTrackedChange = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ textOperation: [
+ 1,
+ {
+ r: 1,
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ {
+ r: 1,
+ tracking: {
+ type: 'none',
+ },
+ },
+ ],
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ ranges: {
+ comments: [
+ {
+ id: commentId,
+ op: {
+ c: 'a',
+ p: 0,
+ hpos: 0,
+ hlen: 3,
+ t: commentId,
+ },
+ meta: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ ],
+ changes: [
+ {
+ id: 'id1',
+ op: {
+ d: 'a',
+ p: 0,
+ },
+ metadata: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ {
+ id: 'id2',
+ op: {
+ i: '\n',
+ p: 0,
+ hpos: 1,
+ },
+ metadata: {
+ user_id: 'user-id',
+ ts: this.timestamp,
+ },
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ fixTrackedChange.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+
+ it('should fix tracked changes in the history store (history-ot)', function (done) {
+ const fixTrackedChange = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ textOperation: [
+ 1,
+ {
+ r: 1,
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ {
+ r: 1,
+ tracking: {
+ type: 'none',
+ },
+ },
+ ],
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ historyOTRanges: {
+ comments: [
+ {
+ id: commentId,
+ ranges: [
+ {
+ pos: 0,
+ length: 3,
+ },
+ ],
+ },
+ ],
+ trackedChanges: [
+ {
+ range: { pos: 0, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'delete',
+ userId: 'user-id',
+ },
+ },
+ {
+ range: { pos: 1, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ fixTrackedChange.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+
+ it('should fix both comments and tracked changes in the history store (history-ot)', function (done) {
+ const fixTrackedChange = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ // not merged due to comment operation using history-ot and tracked-changes operation using sharejs ot
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ commentId,
+ ranges: [{ pos: 1, length: 2 }],
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'main.tex',
+ textOperation: [
+ 1,
+ {
+ r: 1,
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ {
+ r: 1,
+ tracking: {
+ type: 'none',
+ },
+ },
+ ],
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ const update = {
+ path: '/main.tex',
+ projectHistoryId: historyId,
+ resyncDocContent: {
+ content: 'a\nb',
+ historyOTRanges: {
+ comments: [
+ {
+ id: commentId,
+ ranges: [
+ {
+ pos: 1,
+ length: 2,
+ },
+ ],
+ },
+ ],
+ trackedChanges: [
+ {
+ range: { pos: 0, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'delete',
+ userId: 'user-id',
+ },
+ },
+ {
+ range: { pos: 1, length: 1 },
+ tracking: {
+ ts: this.timestamp.toJSON(),
+ type: 'insert',
+ userId: 'user-id',
+ },
+ },
+ ],
+ },
+ },
+ doc: this.doc_id,
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ return done(error)
+ }
+ assert(
+ fixTrackedChange.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ done()
+ }
+ )
+ })
+ })
+
+ describe('resyncProjectStructureOnly', function () {
+ it('should handle structure only updates', function (done) {
+ const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
+
+ MockHistoryStore()
+ .get(`/api/projects/${historyId}/latest/history`)
+ .reply(200, {
+ chunk: {
+ history: {
+ snapshot: {
+ files: {
+ 'main.tex': {
+ hash: '0a207c060e61f3b88eaee0a8cd0696f46fb155eb',
+ stringLength: 3,
+ },
+ },
+ },
+ changes: [],
+ },
+ startVersion: 0,
+ },
+ })
+
+ const docContentRequest = MockHistoryStore()
+ .get(
+ `/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
+ )
+ .reply(200, 'a\nb')
+ MockHistoryStore()
+ .head(`/api/projects/${historyId}/blobs/${fileHash}`)
+ .reply(200)
+ const addFile = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`, body => {
+ expect(body).to.deep.equal([
+ {
+ v2Authors: [],
+ authors: [],
+ timestamp: this.timestamp.toJSON(),
+ operations: [
+ {
+ pathname: 'test.png',
+ file: {
+ hash: fileHash,
+ },
+ },
+ ],
+ origin: { kind: 'test-origin' },
+ },
+ ])
+ return true
+ })
+ .query({ end_version: 0 })
+ .reply(204)
+
+ // allow a 2nd resync
+ MockWeb()
+ .post(`/project/${this.project_id}/history/resync`)
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructureOnly: true,
+ resyncProjectStructure: {
+ docs: [{ path: '/main.tex' }],
+ files: [
+ {
+ file: this.file_id,
+ path: '/test.png',
+ _hash: fileHash,
+ createdBlob: true,
+ },
+ ],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(this.project_id, cb)
+ },
+ cb => {
+ // fails when previous resync did not finish
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ throw error
+ }
+ assert(
+ addFile.isDone(),
+ `/api/projects/${historyId}/changes should have been called`
+ )
+ assert(
+ !docContentRequest.isDone(),
+ 'should not have requested doc content'
+ )
+ done()
+ }
+ )
+ })
+ it('should reject partial resync on docs', function (done) {
+ const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74'
+
+ MockHistoryStore()
+ .get(`/api/projects/${historyId}/latest/history`)
+ .reply(200, {
+ chunk: {
+ history: {
+ snapshot: {
+ files: {
+ 'main.tex': {
+ hash: '0a207c060e61f3b88eaee0a8cd0696f46fb155eb',
+ stringLength: 3,
+ },
+ },
+ },
+ changes: [],
+ },
+ startVersion: 0,
+ },
+ })
+
+ const docContentRequest = MockHistoryStore()
+ .get(
+ `/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
+ )
+ .reply(200, 'a\nb')
+ MockHistoryStore()
+ .head(`/api/projects/${historyId}/blobs/${fileHash}`)
+ .reply(200)
+ const addFile = MockHistoryStore()
+ .post(`/api/projects/${historyId}/legacy_changes`)
+ .query({ end_version: 0 })
+ .reply(204)
+
+ // allow a 2nd resync
+ MockWeb()
+ .post(`/project/${this.project_id}/history/resync`)
+ .reply(204)
+
+ async.series(
+ [
+ cb => {
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ cb => {
+ const update = {
+ projectHistoryId: historyId,
+ resyncProjectStructureOnly: true,
+ resyncProjectStructure: {
+ docs: [{ path: '/main-renamed.tex' }],
+ files: [
+ {
+ file: this.file_id,
+ path: '/test.png',
+ _hash: fileHash,
+ createdBlob: true,
+ },
+ ],
+ },
+ meta: {
+ ts: this.timestamp,
+ },
+ }
+ ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
+ },
+ cb => {
+ ProjectHistoryClient.flushProject(
+ this.project_id,
+ { allowErrors: true },
+ (err, res) => {
+ if (err) return cb(err)
+ expect(res.statusCode).to.equal(500)
+ expect(loggerError).to.have.been.calledWith(
+ sinon.match({
+ err: {
+ name: 'NeedFullProjectStructureResyncError',
+ message: 'aborting partial resync: touched doc',
+ },
+ })
+ )
+
+ getFailure(this.project_id, (err, failure) => {
+ if (err) return cb(err)
+ expect(failure).to.include({
+ error:
+ 'NeedFullProjectStructureResyncError: aborting partial resync: touched doc',
+ })
+ cb()
+ })
+ }
+ )
+ },
+ cb => {
+ // fails when previous resync did not finish
+ ProjectHistoryClient.resyncHistory(this.project_id, cb)
+ },
+ ],
+ error => {
+ if (error) {
+ throw error
+ }
+ assert(!addFile.isDone(), 'should not have persisted changes')
+ assert(
+ !docContentRequest.isDone(),
+ 'should not have requested doc content'
+ )
+ done()
+ }
+ )
+ })
})
})
})
diff --git a/services/project-history/test/acceptance/js/helpers/ProjectHistoryApp.js b/services/project-history/test/acceptance/js/helpers/ProjectHistoryApp.js
index 4fb9993c63..6a81221840 100644
--- a/services/project-history/test/acceptance/js/helpers/ProjectHistoryApp.js
+++ b/services/project-history/test/acceptance/js/helpers/ProjectHistoryApp.js
@@ -9,8 +9,7 @@
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { app } from '../../../../app/js/server.js'
-import logger from '@overleaf/logger'
-logger.logger.level('error')
+import { mongoClient } from '../../../../app/js/mongodb.js'
let running = false
let initing = false
@@ -31,13 +30,16 @@ export function ensureRunning(callback) {
if (error != null) {
throw error
}
- running = true
- return (() => {
- const result = []
- for (callback of Array.from(callbacks)) {
- result.push(callback())
+
+ // Wait for mongo
+ mongoClient.connect(error => {
+ if (error != null) {
+ throw error
}
- return result
- })()
+ running = true
+ for (callback of Array.from(callbacks)) {
+ callback()
+ }
+ })
})
}
diff --git a/services/project-history/test/acceptance/js/helpers/ProjectHistoryClient.js b/services/project-history/test/acceptance/js/helpers/ProjectHistoryClient.js
index cadc7969a7..92caa4bd0e 100644
--- a/services/project-history/test/acceptance/js/helpers/ProjectHistoryClient.js
+++ b/services/project-history/test/acceptance/js/helpers/ProjectHistoryClient.js
@@ -108,25 +108,6 @@ export function getFileTreeDiff(projectId, from, to, callback) {
)
}
-export function getChangesSince(projectId, since, options, callback) {
- request.get(
- {
- url: `http://127.0.0.1:3054/project/${projectId}/changes`,
- qs: {
- since,
- },
- json: true,
- },
- (error, res, body) => {
- if (error) return callback(error)
- if (!options.allowErrors) {
- expect(res.statusCode).to.equal(200)
- }
- callback(null, body, res.statusCode)
- }
- )
-}
-
export function getChangesInChunkSince(projectId, since, options, callback) {
request.get(
{
@@ -330,6 +311,10 @@ export function setFailure(failureEntry, callback) {
)
}
+export function getFailure(projectId, callback) {
+ db.projectHistoryFailures.findOne({ project_id: projectId }, callback)
+}
+
export function transferLabelOwnership(fromUser, toUser, callback) {
request.post(
{
diff --git a/services/project-history/test/unit/js/ErrorRecorder/ErrorRecorderTest.js b/services/project-history/test/unit/js/ErrorRecorder/ErrorRecorderTest.js
index db6d767e58..79af1a8ce1 100644
--- a/services/project-history/test/unit/js/ErrorRecorder/ErrorRecorderTest.js
+++ b/services/project-history/test/unit/js/ErrorRecorder/ErrorRecorderTest.js
@@ -1,5 +1,4 @@
import sinon from 'sinon'
-import { expect } from 'chai'
import { strict as esmock } from 'esmock'
import tk from 'timekeeper'
@@ -12,7 +11,9 @@ describe('ErrorRecorder', function () {
this.db = {
projectHistoryFailures: {
deleteOne: sinon.stub().resolves(),
- updateOne: sinon.stub().resolves(),
+ findOneAndUpdate: sinon
+ .stub()
+ .resolves({ value: { failure: 'record' } }),
},
}
this.mongodb = { db: this.db }
@@ -31,75 +32,65 @@ describe('ErrorRecorder', function () {
})
describe('record', function () {
- describe('with an error', function () {
- beforeEach(async function () {
- this.error = new Error('something bad')
- await expect(
- this.ErrorRecorder.promises.record(
- this.project_id,
- this.queueSize,
- this.error
- )
- ).to.be.rejected
- })
-
- it('should record the error to mongo', function () {
- this.db.projectHistoryFailures.updateOne
- .calledWithMatch(
- {
- project_id: this.project_id,
- },
- {
- $set: {
- queueSize: this.queueSize,
- error: this.error.toString(),
- stack: this.error.stack,
- ts: this.now,
- },
- $inc: {
- attempts: 1,
- },
- $push: {
- history: {
- $each: [
- {
- queueSize: this.queueSize,
- error: this.error.toString(),
- stack: this.error.stack,
- ts: this.now,
- },
- ],
- $position: 0,
- $slice: 10,
- },
- },
- },
- {
- upsert: true,
- }
- )
- .should.equal(true)
- })
+ beforeEach(async function () {
+ this.error = new Error('something bad')
+ await this.ErrorRecorder.promises.record(
+ this.project_id,
+ this.queueSize,
+ this.error
+ )
})
- describe('without an error', function () {
- beforeEach(async function () {
- this.result = await this.ErrorRecorder.promises.record(
- this.project_id,
- this.queueSize,
- this.error
+ it('should record the error to mongo', function () {
+ this.db.projectHistoryFailures.findOneAndUpdate
+ .calledWithMatch(
+ {
+ project_id: this.project_id,
+ },
+ {
+ $set: {
+ queueSize: this.queueSize,
+ error: this.error.toString(),
+ stack: this.error.stack,
+ ts: this.now,
+ },
+ $inc: {
+ attempts: 1,
+ },
+ $push: {
+ history: {
+ $each: [
+ {
+ queueSize: this.queueSize,
+ error: this.error.toString(),
+ stack: this.error.stack,
+ ts: this.now,
+ },
+ ],
+ $position: 0,
+ $slice: 10,
+ },
+ },
+ },
+ {
+ upsert: true,
+ }
)
- })
+ .should.equal(true)
+ })
+ })
- it('should remove any error from mongo', function () {
- this.db.projectHistoryFailures.deleteOne
- .calledWithMatch({ project_id: this.project_id })
- .should.equal(true)
- })
+ describe('clearError', function () {
+ beforeEach(async function () {
+ this.result = await this.ErrorRecorder.promises.clearError(
+ this.project_id
+ )
+ })
- it('should return the queue size', function () {
- expect(this.result).to.equal(this.queueSize)
- })
+ it('should remove any error from mongo', function () {
+ this.db.projectHistoryFailures.deleteOne
+ .calledWithMatch({ project_id: this.project_id })
+ .should.equal(true)
})
})
})
diff --git a/services/project-history/test/unit/js/HistoryStoreManager/HistoryStoreManagerTests.js b/services/project-history/test/unit/js/HistoryStoreManager/HistoryStoreManagerTests.js
index 4b2545dd43..db5b87d65c 100644
--- a/services/project-history/test/unit/js/HistoryStoreManager/HistoryStoreManagerTests.js
+++ b/services/project-history/test/unit/js/HistoryStoreManager/HistoryStoreManagerTests.js
@@ -22,6 +22,7 @@ describe('HistoryStoreManager', function () {
},
apis: {
filestore: {
+ enabled: true,
url: 'http://filestore.overleaf.production',
},
},
@@ -57,7 +58,6 @@ describe('HistoryStoreManager', function () {
}
this.request = sinon.stub()
- this.request.get = sinon.stub()
this.logger = {
debug: sinon.stub(),
@@ -382,6 +382,9 @@ describe('HistoryStoreManager', function () {
this.fileStream = {}
this.hash = 'random-hash'
this.LocalFileWriter.bufferOnDisk.callsArgWith(4, null, this.hash)
+ this.FetchUtils.fetchNothing.rejects(
+ new RequestFailedError('', {}, { status: 404 })
+ )
this.FetchUtils.fetchStream.resolves(this.fileStream)
})
@@ -422,6 +425,30 @@ describe('HistoryStoreManager', function () {
})
})
+ describe('with filestore disabled', function () {
+ beforeEach(function (done) {
+ this.settings.apis.filestore.enabled = false
+ this.file_id = '012345678901234567890123'
+ this.update = {
+ file: true,
+ url: `http://filestore.other.cloud.provider/project/${this.projectId}/file/${this.file_id}`,
+ hash: this.hash,
+ }
+ this.HistoryStoreManager.createBlobForUpdate(
+ this.projectId,
+ this.historyId,
+ this.update,
+ err => {
+ expect(err).to.match(/blocking filestore read/)
+ done()
+ }
+ )
+ })
+ it('should not request the file', function () {
+ expect(this.FetchUtils.fetchStream).to.not.have.been.called
+ })
+ })
+
describe('for a file update with an invalid filestore location', function () {
beforeEach(function (done) {
this.invalid_id = '000000000000000000000000'
@@ -443,7 +470,7 @@ describe('HistoryStoreManager', function () {
})
it('should not request the file from the filestore', function () {
- expect(this.request.get).to.not.have.been.called
+ expect(this.FetchUtils.fetchStream).to.not.have.been.called
})
})
@@ -503,6 +530,7 @@ describe('HistoryStoreManager', function () {
})
describe('when history-v1 confirms that the blob exists', function () {
beforeEach(function (done) {
+ this.FetchUtils.fetchNothing.resolves()
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
diff --git a/services/project-history/test/unit/js/HttpController/HttpControllerTests.js b/services/project-history/test/unit/js/HttpController/HttpControllerTests.js
index 683fd9cea8..1b7adf0ef5 100644
--- a/services/project-history/test/unit/js/HttpController/HttpControllerTests.js
+++ b/services/project-history/test/unit/js/HttpController/HttpControllerTests.js
@@ -40,7 +40,7 @@ describe('HttpController', function () {
clearCachedHistoryId: sinon.stub().yields(),
}
this.ErrorRecorder = {
- record: sinon.stub().yields(),
+ clearError: sinon.stub().yields(),
}
this.LabelsManager = {
createLabel: sinon.stub(),
@@ -567,11 +567,7 @@ describe('HttpController', function () {
})
it('should clear any failure record', function () {
- this.ErrorRecorder.record.should.have.been.calledWith(
- this.projectId,
- 0,
- null
- )
+ this.ErrorRecorder.clearError.should.have.been.calledWith(this.projectId)
})
})
})
diff --git a/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js b/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js
index 7a10d33b2b..d6c52058c1 100644
--- a/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js
+++ b/services/project-history/test/unit/js/SnapshotManager/SnapshotManagerTests.js
@@ -723,10 +723,10 @@ Four five six\
},
},
{
- // 'er th'
+ // 'er the la'
range: {
pos: 28,
- length: 5,
+ length: 9,
},
tracking: {
type: 'delete',
@@ -754,10 +754,23 @@ Four five six\
pos: 26,
length: 4,
},
+ // 'lazy'
+ {
+ pos: 35,
+ length: 4,
+ },
],
resolved: false,
},
{ id: 'comment-2', ranges: [], resolved: true },
+ {
+ id: 'comment-3',
+ ranges: [
+ // 'q'
+ { pos: 4, length: 1 },
+ ],
+ resolved: true,
+ },
],
})
this.data = await this.SnapshotManager.promises.getRangesSnapshot(
@@ -769,36 +782,29 @@ Four five six\
it('should move the comment to the start of the tracked delete and remove overlapping text', function () {
expect(this.data.comments[0].op.p).to.eq(2)
- expect(this.data.comments[0].op.c).to.eq('ck')
- })
-
- it('should remove overlapping text in middle of comment', function () {
- expect(this.data.comments[1].op.p).to.eq(5)
- expect(this.data.comments[1].op.c).to.eq('bown')
- })
-
- it('should remove overlapping text at end of comment', function () {
- expect(this.data.comments[2].op.p).to.eq(20)
- expect(this.data.comments[2].op.c).to.eq('ov')
+ expect(this.data.comments[0].op.c).to.eq('ck bown fox jumps ovzy')
})
it('should put resolved status in op', function () {
expect(this.data.comments[0].op.resolved).to.be.false
- expect(this.data.comments[1].op.resolved).to.be.false
- expect(this.data.comments[2].op.resolved).to.be.false
- expect(this.data.comments[3].op.resolved).to.be.true
+ expect(this.data.comments[1].op.resolved).to.be.true
+ expect(this.data.comments[2].op.resolved).to.be.true
})
it('should include thread id', function () {
expect(this.data.comments[0].op.t).to.eq('comment-1')
- expect(this.data.comments[1].op.t).to.eq('comment-1')
- expect(this.data.comments[2].op.t).to.eq('comment-1')
- expect(this.data.comments[3].op.t).to.eq('comment-2')
+ expect(this.data.comments[1].op.t).to.eq('comment-2')
+ expect(this.data.comments[2].op.t).to.eq('comment-3')
})
- it('should translated detached comment to zero length op', function () {
- expect(this.data.comments[3].op.p).to.eq(0)
- expect(this.data.comments[3].op.c).to.eq('')
+ it('should translate detached comment to zero length op', function () {
+ expect(this.data.comments[1].op.p).to.eq(0)
+ expect(this.data.comments[1].op.c).to.eq('')
+ })
+
+ it('should position a comment entirely in a tracked delete next to the tracked delete', function () {
+ expect(this.data.comments[2].op.p).to.eq(2)
+ expect(this.data.comments[2].op.c).to.eq('')
})
})
@@ -936,43 +942,21 @@ Four five six\
comments: [
{
op: {
- c: '',
+ c: 'brown fox jumps over the ',
p: 4,
t: 'comment-1',
resolved: false,
},
+ id: 'comment-1',
},
{
op: {
- c: 'brown',
- p: 4,
- t: 'comment-1',
- resolved: false,
- },
- },
- {
- op: {
- c: '',
- p: 29,
- t: 'comment-1',
- resolved: false,
- },
- },
- {
- op: {
- c: 'the',
+ c: 'the brown fox jumps over the',
p: 0,
t: 'comment-2',
resolved: true,
},
- },
- {
- op: {
- c: 'the',
- p: 25,
- t: 'comment-2',
- resolved: true,
- },
+ id: 'comment-2',
},
],
})
diff --git a/services/project-history/test/unit/js/SyncManager/SyncManagerTests.js b/services/project-history/test/unit/js/SyncManager/SyncManagerTests.js
index cbf52ac15e..1004ffd78d 100644
--- a/services/project-history/test/unit/js/SyncManager/SyncManagerTests.js
+++ b/services/project-history/test/unit/js/SyncManager/SyncManagerTests.js
@@ -114,7 +114,7 @@ describe('SyncManager', function () {
this.SnapshotManager = {
promises: {
- getLatestSnapshotFiles: sinon.stub(),
+ getLatestSnapshotFilesForChunk: sinon.stub(),
},
}
@@ -411,6 +411,39 @@ describe('SyncManager', function () {
})
})
+ it('records docs to resync when resyncProjectStructureOnly=true is not set', async function () {
+ const updates = [this.projectStructureSyncUpdate]
+ const { updates: filteredUpdates, syncState } =
+ await this.SyncManager.promises.skipUpdatesDuringSync(
+ this.projectId,
+ updates
+ )
+
+ expect(filteredUpdates).to.deep.equal([this.projectStructureSyncUpdate])
+ expect(syncState.toRaw()).to.deep.equal({
+ resyncProjectStructure: false,
+ resyncDocContents: ['new.tex'],
+ origin: { kind: 'history-resync' },
+ })
+ })
+
+ it('records no docs to resync with resyncProjectStructureOnly=true', async function () {
+ this.projectStructureSyncUpdate.resyncProjectStructureOnly = true
+ const updates = [this.projectStructureSyncUpdate]
+ const { updates: filteredUpdates, syncState } =
+ await this.SyncManager.promises.skipUpdatesDuringSync(
+ this.projectId,
+ updates
+ )
+
+ expect(filteredUpdates).to.deep.equal([this.projectStructureSyncUpdate])
+ expect(syncState.toRaw()).to.deep.equal({
+ resyncProjectStructure: false,
+ resyncDocContents: [],
+ origin: { kind: 'history-resync' },
+ })
+ })
+
it('allow project structure updates after project structure sync update', async function () {
const updates = [this.projectStructureSyncUpdate, this.renameUpdate]
const { updates: filteredUpdates, syncState } =
@@ -492,6 +525,7 @@ describe('SyncManager', function () {
_hash: 'abcde',
}
this.loadedSnapshotDoc = File.fromString(this.persistedDocContent)
+ this.mostRecentChunk = 'fake chunk'
this.fileMap = {
'main.tex': {
isEditable: sinon.stub().returns(true),
@@ -517,7 +551,7 @@ describe('SyncManager', function () {
.returns('another.tex')
this.UpdateTranslator._convertPathname.withArgs('1.png').returns('1.png')
this.UpdateTranslator._convertPathname.withArgs('2.png').returns('2.png')
- this.SnapshotManager.promises.getLatestSnapshotFiles.resolves(
+ this.SnapshotManager.promises.getLatestSnapshotFilesForChunk.resolves(
this.fileMap
)
})
@@ -527,13 +561,14 @@ describe('SyncManager', function () {
const expandedUpdates = await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
expect(expandedUpdates).to.equal(updates)
- expect(this.SnapshotManager.promises.getLatestSnapshotFiles).to.not.have
- .been.called
+ expect(this.SnapshotManager.promises.getLatestSnapshotFilesForChunk).to
+ .not.have.been.called
expect(this.extendLock).to.not.have.been.called
})
@@ -549,6 +584,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -562,6 +598,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -586,6 +623,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -621,6 +659,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -631,7 +670,44 @@ describe('SyncManager', function () {
file: newFile.file,
url: newFile.url,
hash: 'hash-42',
- metadata: undefined,
+ meta: {
+ resync: true,
+ ts: TIMESTAMP,
+ origin: { kind: 'history-resync' },
+ },
+ },
+ ])
+ expect(this.extendLock).to.have.been.called
+ })
+
+ it('queues file additions for missing regular files w/o url', async function () {
+ const newFile = {
+ path: '2.png',
+ file: {},
+ _hash: 'hash-42',
+ createdBlob: true,
+ }
+ const updates = [
+ resyncProjectStructureUpdate(
+ [this.persistedDoc],
+ [this.persistedFile, newFile]
+ ),
+ ]
+ const expandedUpdates =
+ await this.SyncManager.promises.expandSyncUpdates(
+ this.projectId,
+ this.historyId,
+ this.mostRecentChunk,
+ updates,
+ this.extendLock
+ )
+
+ expect(expandedUpdates).to.deep.equal([
+ {
+ pathname: newFile.path,
+ file: newFile.file,
+ hash: 'hash-42',
+ createdBlob: true,
meta: {
resync: true,
ts: TIMESTAMP,
@@ -663,6 +739,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -702,6 +779,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -738,6 +816,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -757,7 +836,6 @@ describe('SyncManager', function () {
file: fileWichWasADoc.file,
url: fileWichWasADoc.url,
hash: 'other-hash',
- metadata: undefined,
meta: {
resync: true,
ts: TIMESTAMP,
@@ -789,6 +867,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -840,6 +919,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -886,6 +966,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -929,6 +1010,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -963,6 +1045,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -991,6 +1074,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1016,6 +1100,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1035,7 +1120,53 @@ describe('SyncManager', function () {
file: persistedFileWithNewContent.file,
url: persistedFileWithNewContent.url,
hash: 'anotherhashvalue',
- metadata: undefined,
+ meta: {
+ resync: true,
+ ts: TIMESTAMP,
+ origin: { kind: 'history-resync' },
+ },
+ },
+ ])
+ expect(this.extendLock).to.have.been.called
+ })
+
+ it('removes and re-adds binary files w/o url if they do not have same hash', async function () {
+ const persistedFileWithNewContent = {
+ _hash: 'anotherhashvalue',
+ hello: 'world',
+ path: '1.png',
+ createdBlob: true,
+ }
+ const updates = [
+ resyncProjectStructureUpdate(
+ [this.persistedDoc],
+ [persistedFileWithNewContent]
+ ),
+ ]
+ const expandedUpdates =
+ await this.SyncManager.promises.expandSyncUpdates(
+ this.projectId,
+ this.historyId,
+ this.mostRecentChunk,
+ updates,
+ this.extendLock
+ )
+
+ expect(expandedUpdates).to.deep.equal([
+ {
+ pathname: persistedFileWithNewContent.path,
+ new_pathname: '',
+ meta: {
+ resync: true,
+ ts: TIMESTAMP,
+ origin: { kind: 'history-resync' },
+ },
+ },
+ {
+ pathname: persistedFileWithNewContent.path,
+ file: persistedFileWithNewContent.file,
+ hash: 'anotherhashvalue',
+ createdBlob: true,
meta: {
resync: true,
ts: TIMESTAMP,
@@ -1059,6 +1190,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1083,6 +1215,7 @@ describe('SyncManager', function () {
this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1096,6 +1229,7 @@ describe('SyncManager', function () {
this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1115,6 +1249,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1141,6 +1276,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1179,6 +1315,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1223,6 +1360,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1248,6 +1386,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1284,6 +1423,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1338,6 +1478,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1360,6 +1501,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1403,6 +1545,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1437,6 +1580,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1480,6 +1624,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1534,6 +1679,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1563,6 +1709,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1653,6 +1800,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1674,6 +1822,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1715,6 +1864,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1756,6 +1906,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
@@ -1831,6 +1982,7 @@ describe('SyncManager', function () {
await this.SyncManager.promises.expandSyncUpdates(
this.projectId,
this.historyId,
+ this.mostRecentChunk,
updates,
this.extendLock
)
diff --git a/services/project-history/test/unit/js/UpdateCompressor/UpdateCompressorTests.js b/services/project-history/test/unit/js/UpdateCompressor/UpdateCompressorTests.js
index 8124e30e0b..c8d50b16f4 100644
--- a/services/project-history/test/unit/js/UpdateCompressor/UpdateCompressorTests.js
+++ b/services/project-history/test/unit/js/UpdateCompressor/UpdateCompressorTests.js
@@ -12,6 +12,7 @@ describe('UpdateCompressor', function () {
this.user_id = 'user-id-1'
this.other_user_id = 'user-id-2'
this.doc_id = 'mock-doc-id'
+ this.doc_hash = 'doc-hash'
this.ts1 = Date.now()
this.ts2 = Date.now() + 1000
})
@@ -247,6 +248,50 @@ describe('UpdateCompressor', function () {
},
])
})
+
+ it('should set the doc hash on the last split update only', function () {
+ const meta = {
+ ts: this.ts1,
+ user_id: this.user_id,
+ }
+ expect(
+ this.UpdateCompressor.convertToSingleOpUpdates([
+ {
+ op: [
+ { p: 0, i: 'foo' },
+ { p: 6, i: 'bar' },
+ ],
+ meta: { ...meta, doc_hash: 'hash1' },
+ v: 42,
+ },
+ {
+ op: [{ p: 10, i: 'baz' }],
+ meta: { ...meta, doc_hash: 'hash2' },
+ v: 43,
+ },
+ {
+ op: [
+ { p: 0, d: 'foo' },
+ { p: 20, i: 'quux' },
+ { p: 3, d: 'bar' },
+ ],
+ meta: { ...meta, doc_hash: 'hash3' },
+ v: 44,
+ },
+ ])
+ ).to.deep.equal([
+ { op: { p: 0, i: 'foo' }, meta, v: 42 },
+ { op: { p: 6, i: 'bar' }, meta: { ...meta, doc_hash: 'hash1' }, v: 42 },
+ {
+ op: { p: 10, i: 'baz' },
+ meta: { ...meta, doc_hash: 'hash2' },
+ v: 43,
+ },
+ { op: { p: 0, d: 'foo' }, meta, v: 44 },
+ { op: { p: 20, i: 'quux' }, meta, v: 44 },
+ { op: { p: 3, d: 'bar' }, meta: { ...meta, doc_hash: 'hash3' }, v: 44 },
+ ])
+ })
})
describe('concatUpdatesWithSameVersion', function () {
@@ -376,6 +421,48 @@ describe('UpdateCompressor', function () {
},
])
})
+
+ it("should keep the doc hash only when it's on the last update", function () {
+ const meta = { ts: this.ts1, user_id: this.user_id }
+ const baseUpdate = { doc: this.doc_id, pathname: 'main.tex', meta }
+ const updates = [
+ { ...baseUpdate, op: { p: 0, i: 'foo' }, v: 1 },
+ {
+ ...baseUpdate,
+ op: { p: 10, i: 'bar' },
+ meta: { ...meta, doc_hash: 'hash1' },
+ v: 1,
+ },
+ {
+ ...baseUpdate,
+ op: { p: 20, i: 'baz' },
+ meta: { ...meta, doc_hash: 'hash2' },
+ v: 2,
+ },
+ { ...baseUpdate, op: { p: 30, i: 'quux' }, v: 2 },
+ ]
+ expect(
+ this.UpdateCompressor.concatUpdatesWithSameVersion(updates)
+ ).to.deep.equal([
+ {
+ ...baseUpdate,
+ op: [
+ { p: 0, i: 'foo' },
+ { p: 10, i: 'bar' },
+ ],
+ meta: { ...meta, doc_hash: 'hash1' },
+ v: 1,
+ },
+ {
+ ...baseUpdate,
+ op: [
+ { p: 20, i: 'baz' },
+ { p: 30, i: 'quux' },
+ ],
+ v: 2,
+ },
+ ])
+ })
})
describe('compress', function () {
@@ -1437,5 +1524,68 @@ describe('UpdateCompressor', function () {
])
})
})
+
+ describe('doc hash', function () {
+ it("should keep the doc hash if it's on the last update", function () {
+ const meta = { ts: this.ts1, user_id: this.user_id }
+ expect(
+ this.UpdateCompressor.compressUpdates([
+ { op: { p: 3, i: 'foo' }, meta, v: 42 },
+ {
+ op: { p: 6, i: 'bar' },
+ meta: { ...meta, doc_hash: 'hash1' },
+ v: 43,
+ },
+ ])
+ ).to.deep.equal([
+ {
+ op: { p: 3, i: 'foobar' },
+ meta: { ...meta, doc_hash: 'hash1' },
+ v: 43,
+ },
+ ])
+ })
+
+ it("should not keep the doc hash if it's not on the last update", function () {
+ const meta = { ts: this.ts1, user_id: this.user_id }
+ expect(
+ this.UpdateCompressor.compressUpdates([
+ {
+ op: { p: 3, i: 'foo' },
+ meta: { ...meta, doc_hash: 'hash1' },
+ v: 42,
+ },
+ { op: { p: 6, i: 'bar' }, meta, v: 43 },
+ ])
+ ).to.deep.equal([
+ {
+ op: { p: 3, i: 'foobar' },
+ meta,
+ v: 43,
+ },
+ ])
+ })
+
+ it('special case for delete + insert triggering diff', function () {
+ const meta = { ts: this.ts1, user_id: this.user_id, doc_length: 10 }
+ expect(
+ this.UpdateCompressor.compressUpdates([
+ { op: { p: 3, d: 'foo' }, meta, v: 42 },
+ {
+ op: { p: 3, i: 'bar' },
+ meta: { ...meta, doc_hash: 'hash1' },
+ v: 43,
+ },
+ ])
+ ).to.deep.equal([
+ { op: { p: 3, d: 'foo' }, meta, v: 43 },
+ {
+ op: { p: 3, i: 'bar' },
+ meta: { ...meta, doc_length: 7, doc_hash: 'hash1' },
+ v: 43,
+ },
+ ])
+ })
+ })
})
})
diff --git a/services/project-history/test/unit/js/UpdatesManager/UpdatesProcessorTests.js b/services/project-history/test/unit/js/UpdatesManager/UpdatesProcessorTests.js
index a5d2846cbc..fcc0918e11 100644
--- a/services/project-history/test/unit/js/UpdatesManager/UpdatesProcessorTests.js
+++ b/services/project-history/test/unit/js/UpdatesManager/UpdatesProcessorTests.js
@@ -1,17 +1,3 @@
-/* eslint-disable
- mocha/no-nested-tests,
- no-return-assign,
- no-undef,
- no-unused-vars,
-*/
-// TODO: This file was created by bulk-decaffeinate.
-// Fix any style issues and re-enable lint.
-/*
- * decaffeinate suggestions:
- * DS102: Remove unnecessary code created because of implicit returns
- * DS207: Consider shorter variations of null checks
- * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
- */
import sinon from 'sinon'
import { expect } from 'chai'
import { strict as esmock } from 'esmock'
@@ -20,14 +6,14 @@ import * as Errors from '../../../../app/js/Errors.js'
const MODULE_PATH = '../../../../app/js/UpdatesProcessor.js'
describe('UpdatesProcessor', function () {
- before(async function () {
+ beforeEach(async function () {
this.extendLock = sinon.stub()
this.BlobManager = {
createBlobsForUpdates: sinon.stub(),
}
this.HistoryStoreManager = {
getMostRecentVersion: sinon.stub(),
- sendChanges: sinon.stub().yields(),
+ sendChanges: sinon.stub().yields(null, { resyncNeeded: true }),
}
this.LockManager = {
runWithLock: sinon.spy((key, runner, callback) =>
@@ -36,7 +22,7 @@ describe('UpdatesProcessor', function () {
}
this.RedisManager = {}
this.UpdateCompressor = {
- compressRawUpdates: sinon.stub(),
+ compressRawUpdatesWithMetricsCb: sinon.stub(),
}
this.UpdateTranslator = {
convertToChanges: sinon.stub(),
@@ -53,7 +39,11 @@ describe('UpdatesProcessor', function () {
}
this.ErrorRecorder = {
getLastFailure: sinon.stub(),
- record: sinon.stub().yields(),
+ record: sinon.stub().yields(null, { attempts: 1 }),
+ }
+ this.RetryManager = {
+ isFirstFailure: sinon.stub().returns(true),
+ isHardFailure: sinon.stub().returns(false),
}
this.Profiler = {
Profiler: class {
@@ -101,6 +91,7 @@ describe('UpdatesProcessor', function () {
'../../../../app/js/SyncManager.js': this.SyncManager,
'../../../../app/js/ErrorRecorder.js': this.ErrorRecorder,
'../../../../app/js/Profiler.js': this.Profiler,
+ '../../../../app/js/RetryManager.js': this.RetryManager,
'../../../../app/js/Errors.js': Errors,
'@overleaf/metrics': this.Metrics,
'@overleaf/settings': this.Settings,
@@ -109,7 +100,7 @@ describe('UpdatesProcessor', function () {
this.project_id = 'project-id-123'
this.ol_project_id = 'ol-project-id-234'
this.callback = sinon.stub()
- return (this.temporary = 'temp-mock')
+ this.temporary = 'temp-mock'
})
describe('processUpdatesForProject', function () {
@@ -118,26 +109,26 @@ describe('UpdatesProcessor', function () {
this.queueSize = 445
this.UpdatesProcessor._mocks._countAndProcessUpdates = sinon
.stub()
- .callsArgWith(3, this.error, this.queueSize)
+ .callsArgWith(3, this.error, { queueSize: this.queueSize })
})
describe('when there is no existing error', function () {
beforeEach(function (done) {
this.ErrorRecorder.getLastFailure.yields()
- return this.UpdatesProcessor.processUpdatesForProject(
- this.project_id,
- done
- )
+ this.UpdatesProcessor.processUpdatesForProject(this.project_id, err => {
+ expect(err).to.equal(this.error)
+ done()
+ })
})
it('processes updates', function () {
- return this.UpdatesProcessor._mocks._countAndProcessUpdates
+ this.UpdatesProcessor._mocks._countAndProcessUpdates
.calledWith(this.project_id)
.should.equal(true)
})
- return it('records errors', function () {
- return this.ErrorRecorder.record
+ it('records errors', function () {
+ this.ErrorRecorder.record
.calledWith(this.project_id, this.queueSize, this.error)
.should.equal(true)
})
@@ -154,14 +145,14 @@ describe('UpdatesProcessor', function () {
this.WebApiManager.getHistoryId.yields(null)
})
- return it('returns null', function (done) {
- return this.UpdatesProcessor._getHistoryId(
+ it('returns null', function (done) {
+ this.UpdatesProcessor._getHistoryId(
this.project_id,
this.updates,
(error, projectHistoryId) => {
expect(error).to.be.null
expect(projectHistoryId).to.be.null
- return done()
+ done()
}
)
})
@@ -169,102 +160,102 @@ describe('UpdatesProcessor', function () {
describe('projectHistoryId is not present in updates', function () {
beforeEach(function () {
- return (this.updates = [
+ this.updates = [
{ p: 0, i: 'a' },
{ p: 1, i: 's' },
- ])
+ ]
})
it('returns the id from web', function (done) {
this.projectHistoryId = '1234'
this.WebApiManager.getHistoryId.yields(null, this.projectHistoryId)
- return this.UpdatesProcessor._getHistoryId(
+ this.UpdatesProcessor._getHistoryId(
this.project_id,
this.updates,
(error, projectHistoryId) => {
expect(error).to.be.null
expect(projectHistoryId).equal(this.projectHistoryId)
- return done()
+ done()
}
)
})
- return it('returns errors from web', function (done) {
+ it('returns errors from web', function (done) {
this.error = new Error('oh no!')
this.WebApiManager.getHistoryId.yields(this.error)
- return this.UpdatesProcessor._getHistoryId(
+ this.UpdatesProcessor._getHistoryId(
this.project_id,
this.updates,
error => {
expect(error).to.equal(this.error)
- return done()
+ done()
}
)
})
})
- return describe('projectHistoryId is present in some updates', function () {
+ describe('projectHistoryId is present in some updates', function () {
beforeEach(function () {
this.projectHistoryId = '1234'
- return (this.updates = [
+ this.updates = [
{ p: 0, i: 'a' },
{ p: 1, i: 's', projectHistoryId: this.projectHistoryId },
{ p: 2, i: 'd', projectHistoryId: this.projectHistoryId },
- ])
+ ]
})
it('returns an error if the id is inconsistent between updates', function (done) {
this.updates[1].projectHistoryId = 2345
- return this.UpdatesProcessor._getHistoryId(
+ this.UpdatesProcessor._getHistoryId(
this.project_id,
this.updates,
error => {
expect(error.message).to.equal(
'inconsistent project history id between updates'
)
- return done()
+ done()
}
)
})
it('returns an error if the id is inconsistent between updates and web', function (done) {
this.WebApiManager.getHistoryId.yields(null, 2345)
- return this.UpdatesProcessor._getHistoryId(
+ this.UpdatesProcessor._getHistoryId(
this.project_id,
this.updates,
error => {
expect(error.message).to.equal(
'inconsistent project history id between updates and web'
)
- return done()
+ done()
}
)
})
it('returns the id if it is consistent between updates and web', function (done) {
this.WebApiManager.getHistoryId.yields(null, this.projectHistoryId)
- return this.UpdatesProcessor._getHistoryId(
+ this.UpdatesProcessor._getHistoryId(
this.project_id,
this.updates,
(error, projectHistoryId) => {
expect(error).to.be.null
expect(projectHistoryId).equal(this.projectHistoryId)
- return done()
+ done()
}
)
})
- return it('returns the id if it is consistent between updates but unavaiable in web', function (done) {
+ it('returns the id if it is consistent between updates but unavaiable in web', function (done) {
this.WebApiManager.getHistoryId.yields(new Error('oh no!'))
- return this.UpdatesProcessor._getHistoryId(
+ this.UpdatesProcessor._getHistoryId(
this.project_id,
this.updates,
(error, projectHistoryId) => {
expect(error).to.be.null
expect(projectHistoryId).equal(this.projectHistoryId)
- return done()
+ done()
}
)
})
@@ -289,10 +280,14 @@ describe('UpdatesProcessor', function () {
this.newSyncState = { resyncProjectStructure: false }
this.extendLock = sinon.stub().yields()
+ this.mostRecentChunk = 'fake-chunk'
this.HistoryStoreManager.getMostRecentVersion.yields(
null,
- this.mostRecentVersionInfo
+ this.mostRecentVersionInfo,
+ null,
+ '_lastChange',
+ this.mostRecentChunk
)
this.SyncManager.skipUpdatesDuringSync.yields(
null,
@@ -300,11 +295,14 @@ describe('UpdatesProcessor', function () {
this.newSyncState
)
this.SyncManager.expandSyncUpdates.callsArgWith(
- 4,
+ 5,
null,
this.expandedUpdates
)
- this.UpdateCompressor.compressRawUpdates.returns(this.compressedUpdates)
+ this.UpdateCompressor.compressRawUpdatesWithMetricsCb.yields(
+ null,
+ this.compressedUpdates
+ )
this.BlobManager.createBlobsForUpdates.callsArgWith(
4,
null,
@@ -320,44 +318,45 @@ describe('UpdatesProcessor', function () {
this.ol_project_id,
this.rawUpdates,
this.extendLock,
- err => {
- this.callback(err)
+ (err, flushResponse) => {
+ this.callback(err, flushResponse)
done()
}
)
})
it('should get the latest version id', function () {
- return this.HistoryStoreManager.getMostRecentVersion.should.have.been.calledWith(
+ this.HistoryStoreManager.getMostRecentVersion.should.have.been.calledWith(
this.project_id,
this.ol_project_id
)
})
it('should skip updates when resyncing', function () {
- return this.SyncManager.skipUpdatesDuringSync.should.have.been.calledWith(
+ this.SyncManager.skipUpdatesDuringSync.should.have.been.calledWith(
this.project_id,
this.rawUpdates
)
})
it('should expand sync updates', function () {
- return this.SyncManager.expandSyncUpdates.should.have.been.calledWith(
+ this.SyncManager.expandSyncUpdates.should.have.been.calledWith(
this.project_id,
this.ol_project_id,
+ this.mostRecentChunk,
this.filteredUpdates,
this.extendLock
)
})
it('should compress updates', function () {
- return this.UpdateCompressor.compressRawUpdates.should.have.been.calledWith(
+ this.UpdateCompressor.compressRawUpdatesWithMetricsCb.should.have.been.calledWith(
this.expandedUpdates
)
})
it('should create any blobs for the updates', function () {
- return this.BlobManager.createBlobsForUpdates.should.have.been.calledWith(
+ this.BlobManager.createBlobsForUpdates.should.have.been.calledWith(
this.project_id,
this.ol_project_id,
this.compressedUpdates
@@ -365,14 +364,14 @@ describe('UpdatesProcessor', function () {
})
it('should convert the updates into a change requests', function () {
- return this.UpdateTranslator.convertToChanges.should.have.been.calledWith(
+ this.UpdateTranslator.convertToChanges.should.have.been.calledWith(
this.project_id,
this.updatesWithBlobs
)
})
it('should send the change request to the history store', function () {
- return this.HistoryStoreManager.sendChanges.should.have.been.calledWith(
+ this.HistoryStoreManager.sendChanges.should.have.been.calledWith(
this.project_id,
this.ol_project_id,
['change']
@@ -380,14 +379,80 @@ describe('UpdatesProcessor', function () {
})
it('should set the sync state', function () {
- return this.SyncManager.setResyncState.should.have.been.calledWith(
+ this.SyncManager.setResyncState.should.have.been.calledWith(
this.project_id,
this.newSyncState
)
})
- it('should call the callback with no error', function () {
- return this.callback.should.have.been.called
+ it('should call the callback with no error and flush response', function () {
+ this.callback.should.have.been.calledWith(null, { resyncNeeded: true })
+ })
+ })
+
+ describe('no updates', function () {
+ beforeEach(function (done) {
+ this.SyncManager.skipUpdatesDuringSync.yields(
+ null,
+ [],
+ this.newSyncState
+ )
+ this.UpdatesProcessor._processUpdates(
+ this.project_id,
+ this.ol_project_id,
+ this.rawUpdates,
+ this.extendLock,
+ (err, flushResponse) => {
+ this.callback(err, flushResponse)
+ done()
+ }
+ )
+ })
+
+ it('should not get the latest version id', function () {
+ this.HistoryStoreManager.getMostRecentVersion.should.not.have.been.calledWith(
+ this.project_id,
+ this.ol_project_id
+ )
+ })
+
+ it('should skip updates when resyncing', function () {
+ this.SyncManager.skipUpdatesDuringSync.should.have.been.calledWith(
+ this.project_id,
+ this.rawUpdates
+ )
+ })
+
+ it('should not expand sync updates', function () {
+ this.SyncManager.expandSyncUpdates.should.not.have.been.called
+ })
+
+ it('should not compress updates', function () {
+ this.UpdateCompressor.compressRawUpdatesWithMetricsCb.should.not.have
+ .been.called
+ })
+
+ it('should not create any blobs for the updates', function () {
+ this.BlobManager.createBlobsForUpdates.should.not.have.been.called
+ })
+
+ it('should not convert the updates into a change requests', function () {
+ this.UpdateTranslator.convertToChanges.should.not.have.been.called
+ })
+
+ it('should not send the change request to the history store', function () {
+ this.HistoryStoreManager.sendChanges.should.not.have.been.called
+ })
+
+ it('should set the sync state', function () {
+ this.SyncManager.setResyncState.should.have.been.calledWith(
+ this.project_id,
+ this.newSyncState
+ )
+ })
+
+ it('should call the callback with fake flush response', function () {
+ this.callback.should.have.been.calledWith(null, { resyncNeeded: false })
})
})
@@ -415,8 +480,8 @@ describe('UpdatesProcessor', function () {
})
})
- return describe('_skipAlreadyAppliedUpdates', function () {
- before(function () {
+ describe('_skipAlreadyAppliedUpdates', function () {
+ beforeEach(function () {
this.UpdateTranslator.isProjectStructureUpdate.callsFake(
update => update.version != null
)
@@ -424,49 +489,47 @@ describe('UpdatesProcessor', function () {
})
describe('with all doc ops in order', function () {
- before(function () {
+ beforeEach(function () {
this.updates = [
{ doc: 'id', v: 1 },
{ doc: 'id', v: 2 },
{ doc: 'id', v: 3 },
{ doc: 'id', v: 4 },
]
- return (this.updatesToApply =
- this.UpdatesProcessor._skipAlreadyAppliedUpdates(
- this.project_id,
- this.updates,
- { docs: {} }
- ))
+ this.updatesToApply = this.UpdatesProcessor._skipAlreadyAppliedUpdates(
+ this.project_id,
+ this.updates,
+ { docs: {} }
+ )
})
- return it('should return the original updates', function () {
- return expect(this.updatesToApply).to.eql(this.updates)
+ it('should return the original updates', function () {
+ expect(this.updatesToApply).to.eql(this.updates)
})
})
describe('with all project ops in order', function () {
- before(function () {
+ beforeEach(function () {
this.updates = [
{ version: 1 },
{ version: 2 },
{ version: 3 },
{ version: 4 },
]
- return (this.updatesToApply =
- this.UpdatesProcessor._skipAlreadyAppliedUpdates(
- this.project_id,
- this.updates,
- { docs: {} }
- ))
+ this.updatesToApply = this.UpdatesProcessor._skipAlreadyAppliedUpdates(
+ this.project_id,
+ this.updates,
+ { docs: {} }
+ )
})
- return it('should return the original updates', function () {
- return expect(this.updatesToApply).to.eql(this.updates)
+ it('should return the original updates', function () {
+ expect(this.updatesToApply).to.eql(this.updates)
})
})
describe('with all multiple doc and ops in order', function () {
- before(function () {
+ beforeEach(function () {
this.updates = [
{ doc: 'id1', v: 1 },
{ doc: 'id1', v: 2 },
@@ -481,78 +544,60 @@ describe('UpdatesProcessor', function () {
{ version: 3 },
{ version: 4 },
]
- return (this.updatesToApply =
- this.UpdatesProcessor._skipAlreadyAppliedUpdates(
- this.project_id,
- this.updates,
- { docs: {} }
- ))
+ this.updatesToApply = this.UpdatesProcessor._skipAlreadyAppliedUpdates(
+ this.project_id,
+ this.updates,
+ { docs: {} }
+ )
})
- return it('should return the original updates', function () {
- return expect(this.updatesToApply).to.eql(this.updates)
+ it('should return the original updates', function () {
+ expect(this.updatesToApply).to.eql(this.updates)
})
})
describe('with doc ops out of order', function () {
- before(function () {
+ beforeEach(function () {
this.updates = [
{ doc: 'id', v: 1 },
{ doc: 'id', v: 2 },
{ doc: 'id', v: 4 },
{ doc: 'id', v: 3 },
]
- this.skipFn = sinon.spy(
- this.UpdatesProcessor._mocks,
- '_skipAlreadyAppliedUpdates'
- )
- try {
- return (this.updatesToApply =
- this.UpdatesProcessor._skipAlreadyAppliedUpdates(
- this.project_id,
- this.updates,
- { docs: {} }
- ))
- } catch (error) {}
})
- after(function () {
- return this.skipFn.restore()
- })
-
- return it('should throw an exception', function () {
- return this.skipFn.threw('OpsOutOfOrderError').should.equal(true)
+ it('should throw an exception', function () {
+ expect(() => {
+ this.UpdatesProcessor._skipAlreadyAppliedUpdates(
+ this.project_id,
+ this.updates,
+ { docs: {} }
+ )
+ }).to.throw(Errors.OpsOutOfOrderError)
})
})
- return describe('with project ops out of order', function () {
- before(function () {
+ describe('with project ops out of order', function () {
+ beforeEach(function () {
+ this.UpdateTranslator.isProjectStructureUpdate.callsFake(
+ update => update.version != null
+ )
this.updates = [
{ version: 1 },
{ version: 2 },
{ version: 4 },
{ version: 3 },
]
- this.skipFn = sinon.spy(
- this.UpdatesProcessor._mocks,
- '_skipAlreadyAppliedUpdates'
- )
- try {
- return (this.updatesToApply =
- this.UpdatesProcessor._skipAlreadyAppliedUpdates(
- this.project_id,
- this.updates,
- { docs: {} }
- ))
- } catch (error) {}
})
- after(function () {
- return this.skipFn.restore()
- })
-
- return it('should throw an exception', function () {
- return this.skipFn.threw('OpsOutOfOrderError').should.equal(true)
+ it('should throw an exception', function () {
+ expect(() => {
+ this.UpdatesProcessor._skipAlreadyAppliedUpdates(
+ this.project_id,
+ this.updates,
+ { docs: {} }
+ )
+ }).to.throw(Errors.OpsOutOfOrderError)
})
})
})
diff --git a/services/real-time/.gitignore b/services/real-time/.gitignore
deleted file mode 100644
index 80bac793a7..0000000000
--- a/services/real-time/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-node_modules
-forever
-
-# managed by dev-environment$ bin/update_build_scripts
-.npmrc
diff --git a/services/real-time/.nvmrc b/services/real-time/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/real-time/.nvmrc
+++ b/services/real-time/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/real-time/Dockerfile b/services/real-time/Dockerfile
index 21832e7427..4fb5283c1e 100644
--- a/services/real-time/Dockerfile
+++ b/services/real-time/Dockerfile
@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/real-time
diff --git a/services/real-time/Makefile b/services/real-time/Makefile
index 6ef424ce91..7d2bcacb77 100644
--- a/services/real-time/Makefile
+++ b/services/real-time/Makefile
@@ -32,12 +32,30 @@ HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
-RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.0 npm run --silent
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:22.17.0 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
-RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.0 npm run --silent
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:22.17.0 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
format:
$(RUN_LINTING) format
@@ -63,7 +81,7 @@ typecheck:
typecheck_ci:
$(RUN_LINTING_CI) types:check
-test: format lint typecheck test_unit test_acceptance
+test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
@@ -130,6 +148,7 @@ publish:
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
diff --git a/services/real-time/app.js b/services/real-time/app.js
index 61b3f0cad1..a66833d9d9 100644
--- a/services/real-time/app.js
+++ b/services/real-time/app.js
@@ -82,16 +82,45 @@ io.configure(function () {
// See http://answers.dotcloud.com/question/578/problem-with-websocket-over-ssl-in-safari-with
io.set('match origin protocol', true)
- // gzip uses a Node 0.8.x method of calling the gzip program which
- // doesn't work with 0.6.x
- // io.enable('browser client gzip')
- io.set('transports', [
- 'websocket',
- 'flashsocket',
- 'htmlfile',
- 'xhr-polling',
- 'jsonp-polling',
- ])
+ io.set('transports', ['websocket', 'xhr-polling'])
+
+ if (Settings.allowedCorsOrigins) {
+ // Create a regex for matching origins, allowing wildcard subdomains
+ const allowedCorsOriginsRegex = new RegExp(
+ `^${Settings.allowedCorsOrigins.replaceAll('.', '\\.').replace('://*', '://[^.]+')}(?::443)?$`
+ )
+
+ io.set('origins', function (origin, req) {
+ if (!origin) {
+ // There is no origin or referer header - this is likely a same-site request.
+ logger.warn({ req }, 'No origin or referer header')
+ return true
+ }
+ const normalizedOrigin = URL.parse(origin).origin
+ const originIsValid = allowedCorsOriginsRegex.test(normalizedOrigin)
+
+ if (req.headers.origin) {
+ if (!originIsValid) {
+ logger.warn(
+ { normalizedOrigin, origin, req },
+ 'Origin header does not match allowed origins'
+ )
+ }
+ return originIsValid
+ }
+
+ if (!originIsValid) {
+ // There is no Origin header and the Referrer does not satisfy the
+ // constraints. We're going to pass this anyway for now but log it
+ logger.warn(
+ { req, referer: req.headers.referer },
+ 'Referrer header does not match allowed origins'
+ )
+ }
+
+ return true
+ })
+ }
})
// Serve socket.io.js client file from imported dist folder
@@ -236,6 +265,7 @@ function drainAndShutdown(signal) {
}
Settings.shutDownInProgress = false
+Settings.shutDownScheduled = false
const shutdownDrainTimeWindow = parseInt(Settings.shutdownDrainTimeWindow, 10)
if (Settings.shutdownDrainTimeWindow) {
logger.info({ shutdownDrainTimeWindow }, 'shutdownDrainTimeWindow enabled')
@@ -262,7 +292,11 @@ if (Settings.shutdownDrainTimeWindow) {
'EPIPE',
'ECONNRESET',
'ERR_STREAM_WRITE_AFTER_END',
- ].includes(error.code)
+ ].includes(error.code) ||
+ // socket.io error handler sending on polling connection again.
+ (error.code === 'ERR_HTTP_HEADERS_SENT' &&
+ error.stack &&
+ error.stack.includes('Transport.error'))
) {
Metrics.inc('disconnected_write', 1, { status: error.code })
return logger.warn(
@@ -271,8 +305,16 @@ if (Settings.shutdownDrainTimeWindow) {
)
}
logger.error({ err: error }, 'uncaught exception')
- if (Settings.errors && Settings.errors.shutdownOnUncaughtError) {
- drainAndShutdown('SIGABRT')
+ if (
+ Settings.errors?.shutdownOnUncaughtError &&
+ !Settings.shutDownScheduled
+ ) {
+ Settings.shutDownScheduled = true
+ const delay = Math.ceil(
+ Math.random() * 60 * Math.max(io.sockets.clients().length, 1_000)
+ )
+ logger.info({ delay }, 'delaying shutdown on uncaught error')
+ setTimeout(() => drainAndShutdown('SIGABRT'), delay)
}
})
}
diff --git a/services/real-time/app/js/ConnectedUsersManager.js b/services/real-time/app/js/ConnectedUsersManager.js
index 299a4b870a..4ce3dcdcad 100644
--- a/services/real-time/app/js/ConnectedUsersManager.js
+++ b/services/real-time/app/js/ConnectedUsersManager.js
@@ -3,6 +3,7 @@ const Settings = require('@overleaf/settings')
const logger = require('@overleaf/logger')
const redis = require('@overleaf/redis-wrapper')
const OError = require('@overleaf/o-error')
+const Metrics = require('@overleaf/metrics')
const rclient = redis.createClient(Settings.redis.realtime)
const Keys = Settings.redis.realtime.key_schema
@@ -13,7 +14,25 @@ const FOUR_DAYS_IN_S = ONE_DAY_IN_S * 4
const USER_TIMEOUT_IN_S = ONE_HOUR_IN_S / 4
const REFRESH_TIMEOUT_IN_S = 10 // only show clients which have responded to a refresh request in the last 10 seconds
+function recordProjectNotEmptySinceMetric(res, status) {
+ const diff = Date.now() / 1000 - parseInt(res, 10)
+ const BUCKETS = [
+ 0,
+ ONE_HOUR_IN_S,
+ 2 * ONE_HOUR_IN_S,
+ ONE_DAY_IN_S,
+ 2 * ONE_DAY_IN_S,
+ 7 * ONE_DAY_IN_S,
+ 30 * ONE_DAY_IN_S,
+ ]
+ Metrics.histogram('project_not_empty_since', diff, BUCKETS, { status })
+}
+
module.exports = {
+ countConnectedClients(projectId, callback) {
+ rclient.scard(Keys.clientsInProject({ project_id: projectId }), callback)
+ },
+
// Use the same method for when a user connects, and when a user sends a cursor
// update. This way we don't care if the connected_user key has expired when
// we receive a cursor update.
@@ -23,6 +42,7 @@ module.exports = {
const multi = rclient.multi()
multi.sadd(Keys.clientsInProject({ project_id: projectId }), clientId)
+ multi.scard(Keys.clientsInProject({ project_id: projectId }))
multi.expire(
Keys.clientsInProject({ project_id: projectId }),
FOUR_DAYS_IN_S
@@ -66,11 +86,17 @@ module.exports = {
USER_TIMEOUT_IN_S
)
- multi.exec(function (err) {
+ multi.exec(function (err, res) {
if (err) {
err = new OError('problem marking user as connected').withCause(err)
+ return callback(err)
}
- callback(err)
+ const [, nConnectedClients] = res
+ Metrics.inc('editing_session_mode', 1, {
+ method: cursorData ? 'update' : 'connect',
+ status: nConnectedClients === 1 ? 'single' : 'multi',
+ })
+ callback(null)
})
},
@@ -100,6 +126,7 @@ module.exports = {
logger.debug({ projectId, clientId }, 'marking user as disconnected')
const multi = rclient.multi()
multi.srem(Keys.clientsInProject({ project_id: projectId }), clientId)
+ multi.scard(Keys.clientsInProject({ project_id: projectId }))
multi.expire(
Keys.clientsInProject({ project_id: projectId }),
FOUR_DAYS_IN_S
@@ -107,11 +134,58 @@ module.exports = {
multi.del(
Keys.connectedUser({ project_id: projectId, client_id: clientId })
)
- multi.exec(function (err) {
+ multi.exec(function (err, res) {
if (err) {
err = new OError('problem marking user as disconnected').withCause(err)
+ return callback(err)
}
- callback(err)
+ const [, nConnectedClients] = res
+ const status =
+ nConnectedClients === 0
+ ? 'empty'
+ : nConnectedClients === 1
+ ? 'single'
+ : 'multi'
+ Metrics.inc('editing_session_mode', 1, {
+ method: 'disconnect',
+ status,
+ })
+ if (status === 'empty') {
+ rclient.getdel(Keys.projectNotEmptySince({ projectId }), (err, res) => {
+ if (err) {
+ logger.warn(
+ { err, projectId },
+ 'could not collect projectNotEmptySince'
+ )
+ } else if (res) {
+ recordProjectNotEmptySinceMetric(res, status)
+ }
+ })
+ } else {
+ // Only populate projectNotEmptySince when more clients remain connected.
+ const nowInSeconds = Math.ceil(Date.now() / 1000).toString()
+ // We can go back to SET GET after upgrading to redis 7.0+
+ const multi = rclient.multi()
+ multi.get(Keys.projectNotEmptySince({ projectId }))
+ multi.set(
+ Keys.projectNotEmptySince({ projectId }),
+ nowInSeconds,
+ 'NX',
+ 'EX',
+ 31 * ONE_DAY_IN_S
+ )
+ multi.exec((err, res) => {
+ if (err) {
+ logger.warn(
+ { err, projectId },
+ 'could not get/set projectNotEmptySince'
+ )
+ } else if (res[0]) {
+ recordProjectNotEmptySinceMetric(res[0], status)
+ }
+ })
+ }
+ callback(null)
})
},
diff --git a/services/real-time/app/js/DocumentUpdaterManager.js b/services/real-time/app/js/DocumentUpdaterManager.js
index 0a9a12c99d..51b71e8ec0 100644
--- a/services/real-time/app/js/DocumentUpdaterManager.js
+++ b/services/real-time/app/js/DocumentUpdaterManager.js
@@ -19,7 +19,7 @@ const Keys = settings.redis.documentupdater.key_schema
const DocumentUpdaterManager = {
getDocument(projectId, docId, fromVersion, callback) {
const timer = new metrics.Timer('get-document')
- const url = `${settings.apis.documentupdater.url}/project/${projectId}/doc/${docId}?fromVersion=${fromVersion}`
+ const url = `${settings.apis.documentupdater.url}/project/${projectId}/doc/${docId}?fromVersion=${fromVersion}&historyOTSupport=true`
logger.debug(
{ projectId, docId, fromVersion },
'getting doc from document updater'
@@ -48,7 +48,8 @@ const DocumentUpdaterManager = {
body.version,
body.ranges,
body.ops,
- body.ttlInS
+ body.ttlInS,
+ body.type
)
} else if (res.statusCode === 422 && body?.firstVersionInRedis) {
callback(new ClientRequestedMissingOpsError(422, body))
diff --git a/services/real-time/app/js/HttpApiController.js b/services/real-time/app/js/HttpApiController.js
index 122f1838be..5e75fe3601 100644
--- a/services/real-time/app/js/HttpApiController.js
+++ b/services/real-time/app/js/HttpApiController.js
@@ -1,8 +1,23 @@
const WebsocketLoadBalancer = require('./WebsocketLoadBalancer')
const DrainManager = require('./DrainManager')
+const ConnectedUsersManager = require('./ConnectedUsersManager')
const logger = require('@overleaf/logger')
module.exports = {
+ countConnectedClients(req, res) {
+ const { projectId } = req.params
+ ConnectedUsersManager.countConnectedClients(
+ projectId,
+ (err, nConnectedClients) => {
+ if (err) {
+ logger.err({ err, projectId }, 'count connected clients failed')
+ return res.sendStatus(500)
+ }
+ res.json({ nConnectedClients })
+ }
+ )
+ },
+
sendMessage(req, res) {
logger.debug({ message: req.params.message }, 'sending message')
if (Array.isArray(req.body)) {
diff --git a/services/real-time/app/js/Router.js b/services/real-time/app/js/Router.js
index 8aaad2a164..943453bc13 100644
--- a/services/real-time/app/js/Router.js
+++ b/services/real-time/app/js/Router.js
@@ -11,6 +11,8 @@ const { UnexpectedArgumentsError } = require('./Errors')
const Joi = require('joi')
const HOSTNAME = require('node:os').hostname()
+const SERVER_PING_INTERVAL = 15000
+const SERVER_PING_LATENCY_THRESHOLD = 5000
const JOI_OBJECT_ID = Joi.string()
.required()
@@ -111,6 +113,10 @@ module.exports = Router = {
bodyParser.json({ limit: '5mb' }),
HttpApiController.sendMessage
)
+ app.get(
+ '/project/:projectId/count-connected-clients',
+ HttpApiController.countConnectedClients
+ )
app.post('/drain', HttpApiController.startDrain)
app.post(
@@ -127,7 +133,10 @@ module.exports = Router = {
if (client) {
client.on('error', function (err) {
- logger.err({ clientErr: err }, 'socket.io client error')
+ logger.err(
+ { clientErr: err, publicId: client.publicId, clientId: client.id },
+ 'socket.io client error'
+ )
if (client.connected) {
client.emit('reconnectGracefully')
client.disconnect()
@@ -169,20 +178,33 @@ module.exports = Router = {
}
return
}
-
+ const useServerPing =
+ !!client.handshake?.query?.esh &&
+ !!client.handshake?.query?.ssp &&
+ // No server ping with long-polling transports.
+ client.transport === 'websocket'
+ const isDebugging = !!client.handshake?.query?.debugging
const projectId = client.handshake?.query?.projectId
- try {
- Joi.assert(projectId, JOI_OBJECT_ID)
- } catch (error) {
- metrics.inc('socket-io.connection', 1, {
- status: client.transport,
- method: projectId ? 'bad-project-id' : 'missing-project-id',
- })
- client.emit('connectionRejected', {
- message: 'missing/bad ?projectId=... query flag on handshake',
- })
- client.disconnect()
- return
+
+ if (isDebugging) {
+ client.connectedAt = Date.now()
+ client.isDebugging = true
+ }
+
+ if (!isDebugging) {
+ try {
+ Joi.assert(projectId, JOI_OBJECT_ID)
+ } catch (error) {
+ metrics.inc('socket-io.connection', 1, {
+ status: client.transport,
+ method: projectId ? 'bad-project-id' : 'missing-project-id',
+ })
+ client.emit('connectionRejected', {
+ message: 'missing/bad ?projectId=... query flag on handshake',
+ })
+ client.disconnect()
+ return
+ }
}
// The client.id is security sensitive. Generate a publicId for sending to other clients.
@@ -198,8 +220,6 @@ module.exports = Router = {
})
metrics.gauge('socket-io.clients', io.sockets.clients().length)
- logger.debug({ session, clientId: client.id }, 'client connected')
-
let user
if (session && session.passport && session.passport.user) {
;({ user } = session.passport)
@@ -210,6 +230,106 @@ module.exports = Router = {
user = { _id: 'anonymous-user', anonymousAccessToken }
}
+ const info = {
+ userId: user._id,
+ projectId,
+ transport: client.transport,
+ publicId: client.publicId,
+ clientId: client.id,
+ isDebugging,
+ }
+ if (isDebugging) {
+ logger.info(info, 'client connected')
+ } else {
+ logger.debug(info, 'client connected')
+ }
+
+ const connectionDetails = {
+ userId: user._id,
+ projectId,
+ remoteIp: client.remoteIp,
+ publicId: client.publicId,
+ clientId: client.id,
+ }
+
+ let pingTimestamp
+ let pingId = -1
+ let pongId = -1
+ const pingTimer = useServerPing
+ ? setInterval(function () {
+ if (pongId !== pingId) {
+ logger.warn(
+ {
+ ...connectionDetails,
+ pingId,
+ pongId,
+ lastPingTimestamp: pingTimestamp,
+ },
+ 'no client response to last ping'
+ )
+ }
+ pingTimestamp = Date.now()
+ client.emit(
+ 'serverPing',
+ ++pingId,
+ pingTimestamp,
+ client.transport,
+ client.id
+ )
+ }, SERVER_PING_INTERVAL)
+ : null
+ client.on(
+ 'clientPong',
+ function (
+ receivedPingId,
+ sentTimestamp,
+ serverTransport,
+ serverSessionId,
+ clientTransport,
+ clientSessionId
+ ) {
+ pongId = receivedPingId
+ const receivedTimestamp = Date.now()
+ if (
+ receivedPingId !== pingId ||
+ (serverSessionId && serverSessionId !== clientSessionId)
+ ) {
+ logger.warn(
+ {
+ ...connectionDetails,
+ receivedPingId,
+ pingId,
+ sentTimestamp,
+ receivedTimestamp,
+ latency: receivedTimestamp - sentTimestamp,
+ lastPingTimestamp: pingTimestamp,
+ serverTransport,
+ serverSessionId,
+ clientTransport,
+ clientSessionId,
+ },
+ 'received pong with wrong counter'
+ )
+ } else if (
+ receivedTimestamp - sentTimestamp >
+ SERVER_PING_LATENCY_THRESHOLD
+ ) {
+ logger.warn(
+ {
+ ...connectionDetails,
+ receivedPingId,
+ pingId,
+ sentTimestamp,
+ receivedTimestamp,
+ latency: receivedTimestamp - sentTimestamp,
+ lastPingTimestamp: pingTimestamp,
+ },
+ 'received pong with high latency'
+ )
+ }
+ }
+ )
+
if (settings.exposeHostname) {
client.on('debug.getHostname', function (callback) {
if (typeof callback !== 'function') {
@@ -222,7 +342,33 @@ module.exports = Router = {
callback(HOSTNAME)
})
}
+ client.on('debug', (data, callback) => {
+ if (typeof callback !== 'function') {
+ return Router._handleInvalidArguments(client, 'debug', arguments)
+ }
+ logger.info(
+ { publicId: client.publicId, clientId: client.id },
+ 'received debug message'
+ )
+
+ const response = {
+ serverTime: Date.now(),
+ data,
+ client: {
+ publicId: client.publicId,
+ remoteIp: client.remoteIp,
+ userAgent: client.userAgent,
+ connected: !client.disconnected,
+ connectedAt: client.connectedAt,
+ },
+ server: {
+ hostname: settings.exposeHostname ? HOSTNAME : undefined,
+ },
+ }
+
+ callback(response)
+ })
const joinProject = function (callback) {
WebsocketController.joinProject(
client,
@@ -245,6 +391,17 @@ module.exports = Router = {
metrics.inc('socket-io.disconnect', 1, { status: client.transport })
metrics.gauge('socket-io.clients', io.sockets.clients().length)
+ if (client.isDebugging) {
+ const duration = Date.now() - client.connectedAt
+ metrics.timing('socket-io.debugging.duration', duration)
+ logger.info(
+ { duration, publicId: client.publicId, clientId: client.id },
+ 'debug client disconnected'
+ )
+ } else {
+ clearInterval(pingTimer)
+ }
+
WebsocketController.leaveProject(io, client, function (err) {
if (err) {
Router._handleError(function () {}, err, client, 'leaveProject')
@@ -426,7 +583,6 @@ module.exports = Router = {
if (err) {
Router._handleError(callback, err, client, 'applyOtUpdate', {
doc_id: docId,
- update,
})
} else {
callback()
@@ -435,19 +591,21 @@ module.exports = Router = {
)
})
- joinProject((err, project, permissionsLevel, protocolVersion) => {
- if (err) {
- client.emit('connectionRejected', err)
- client.disconnect()
- return
- }
- client.emit('joinProjectResponse', {
- publicId: client.publicId,
- project,
- permissionsLevel,
- protocolVersion,
+ if (!isDebugging) {
+ joinProject((err, project, permissionsLevel, protocolVersion) => {
+ if (err) {
+ client.emit('connectionRejected', err)
+ client.disconnect()
+ return
+ }
+ client.emit('joinProjectResponse', {
+ publicId: client.publicId,
+ project,
+ permissionsLevel,
+ protocolVersion,
+ })
})
- })
+ }
})
},
}
diff --git a/services/real-time/app/js/WebsocketController.js b/services/real-time/app/js/WebsocketController.js
index dec567709a..c0f465a490 100644
--- a/services/real-time/app/js/WebsocketController.js
+++ b/services/real-time/app/js/WebsocketController.js
@@ -8,6 +8,7 @@ const ConnectedUsersManager = require('./ConnectedUsersManager')
const WebsocketLoadBalancer = require('./WebsocketLoadBalancer')
const RoomManager = require('./RoomManager')
const {
+ CodedError,
JoinLeaveEpochMismatchError,
NotAuthorizedError,
NotJoinedError,
@@ -283,7 +284,7 @@ module.exports = WebsocketController = {
projectId,
docId,
fromVersion,
- function (error, lines, version, ranges, ops, ttlInS) {
+ function (error, lines, version, ranges, ops, ttlInS, type) {
if (error) {
if (error instanceof ClientRequestedMissingOpsError) {
emitJoinDocCatchUpMetrics('missing', error.info)
@@ -307,36 +308,53 @@ module.exports = WebsocketController = {
// See http://ecmanaut.blogspot.co.uk/2006/07/encoding-decoding-utf8-in-javascript.html
const encodeForWebsockets = text =>
unescape(encodeURIComponent(text))
- const escapedLines = []
- for (let line of lines) {
- try {
- line = encodeForWebsockets(line)
- } catch (err) {
- OError.tag(err, 'error encoding line uri component', { line })
- return callback(err)
+ metrics.inc('client_supports_history_v1_ot', 1, {
+ status: options.supportsHistoryOT ? 'success' : 'failure',
+ })
+ let escapedLines
+ if (type === 'history-ot') {
+ if (!options.supportsHistoryOT) {
+ RoomManager.leaveDoc(client, docId)
+ // TODO(24596): ask the user to reload the editor page (via out-of-sync modal when there are pending ops).
+ return callback(
+ new CodedError('client does not support history-ot')
+ )
}
- escapedLines.push(line)
- }
- if (options.encodeRanges) {
- try {
- for (const comment of (ranges && ranges.comments) || []) {
- if (comment.op.c) {
- comment.op.c = encodeForWebsockets(comment.op.c)
- }
+ escapedLines = lines
+ } else {
+ escapedLines = []
+ for (let line of lines) {
+ try {
+ line = encodeForWebsockets(line)
+ } catch (err) {
+ OError.tag(err, 'error encoding line uri component', {
+ line,
+ })
+ return callback(err)
}
- for (const change of (ranges && ranges.changes) || []) {
- if (change.op.i) {
- change.op.i = encodeForWebsockets(change.op.i)
+ escapedLines.push(line)
+ }
+ if (options.encodeRanges) {
+ try {
+ for (const comment of (ranges && ranges.comments) || []) {
+ if (comment.op.c) {
+ comment.op.c = encodeForWebsockets(comment.op.c)
+ }
}
- if (change.op.d) {
- change.op.d = encodeForWebsockets(change.op.d)
+ for (const change of (ranges && ranges.changes) || []) {
+ if (change.op.i) {
+ change.op.i = encodeForWebsockets(change.op.i)
+ }
+ if (change.op.d) {
+ change.op.d = encodeForWebsockets(change.op.d)
+ }
}
+ } catch (err) {
+ OError.tag(err, 'error encoding range uri component', {
+ ranges,
+ })
+ return callback(err)
}
- } catch (err) {
- OError.tag(err, 'error encoding range uri component', {
- ranges,
- })
- return callback(err)
}
}
@@ -351,7 +369,7 @@ module.exports = WebsocketController = {
},
'client joined doc'
)
- callback(null, escapedLines, version, ops, ranges)
+ callback(null, escapedLines, version, ops, ranges, type)
}
)
})
diff --git a/services/real-time/buildscript.txt b/services/real-time/buildscript.txt
index 4fe318268f..1eb77d9f6b 100644
--- a/services/real-time/buildscript.txt
+++ b/services/real-time/buildscript.txt
@@ -4,6 +4,6 @@ real-time
--env-add=
--env-pass-through=
--esmock-loader=False
---node-version=20.18.0
+--node-version=22.17.0
--public-repo=False
---script-version=4.5.0
+--script-version=4.7.0
diff --git a/services/real-time/config/settings.defaults.js b/services/real-time/config/settings.defaults.js
index 1cc0c0f107..57b0a50a42 100644
--- a/services/real-time/config/settings.defaults.js
+++ b/services/real-time/config/settings.defaults.js
@@ -38,6 +38,9 @@ const settings = {
connectedUser({ project_id, client_id }) {
return `connected_user:{${project_id}}:${client_id}`
},
+ projectNotEmptySince({ projectId }) {
+ return `projectNotEmptySince:{${projectId}}`
+ },
},
maxRetriesPerRequest: parseInt(
process.env.REAL_TIME_REDIS_MAX_RETRIES_PER_REQUEST ||
@@ -170,6 +173,7 @@ const settings = {
behindProxy: process.env.BEHIND_PROXY === 'true',
trustedProxyIps: process.env.TRUSTED_PROXY_IPS,
keepAliveTimeoutMs: parseInt(process.env.KEEPALIVE_TIMEOUT_MS ?? '5000', 10),
+ allowedCorsOrigins: process.env.REAL_TIME_ALLOWED_CORS_ORIGINS,
}
// console.log settings.redis
diff --git a/services/real-time/docker-compose.ci.yml b/services/real-time/docker-compose.ci.yml
index ca9813adad..a5a2292e72 100644
--- a/services/real-time/docker-compose.ci.yml
+++ b/services/real-time/docker-compose.ci.yml
@@ -21,6 +21,7 @@ services:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
+ HISTORY_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
@@ -42,7 +43,7 @@ services:
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
redis:
- image: redis
+ image: redis:7.4.3
healthcheck:
test: ping="$$(redis-cli ping)" && [ "$$ping" = 'PONG' ]
interval: 1s
diff --git a/services/real-time/docker-compose.yml b/services/real-time/docker-compose.yml
index cb761bb315..a4f019f6be 100644
--- a/services/real-time/docker-compose.yml
+++ b/services/real-time/docker-compose.yml
@@ -6,7 +6,7 @@ version: "2.3"
services:
test_unit:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/real-time
- ../../node_modules:/overleaf/node_modules
@@ -14,13 +14,14 @@ services:
working_dir: /overleaf/services/real-time
environment:
MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
- image: node:20.18.0
+ image: node:22.17.0
volumes:
- .:/overleaf/services/real-time
- ../../node_modules:/overleaf/node_modules
@@ -29,12 +30,13 @@ services:
environment:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
+ HISTORY_REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
- LOG_LEVEL: ERROR
+ LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
@@ -44,7 +46,7 @@ services:
command: npm run --silent test:acceptance
redis:
- image: redis
+ image: redis:7.4.3
healthcheck:
test: ping=$$(redis-cli ping) && [ "$$ping" = 'PONG' ]
interval: 1s
diff --git a/services/real-time/package.json b/services/real-time/package.json
index 69c21f8351..a52e0dfcf9 100644
--- a/services/real-time/package.json
+++ b/services/real-time/package.json
@@ -28,20 +28,20 @@
"bunyan": "^1.8.15",
"connect-redis": "^6.1.3",
"cookie-parser": "^1.4.6",
- "express": "^4.21.0",
+ "express": "^4.21.2",
"express-session": "^1.17.1",
"joi": "^17.12.0",
"lodash": "^4.17.21",
"proxy-addr": "^2.0.7",
"request": "^2.88.2",
- "socket.io": "github:overleaf/socket.io#0.9.19-overleaf-10",
+ "socket.io": "github:overleaf/socket.io#0.9.19-overleaf-12",
"socket.io-client": "github:overleaf/socket.io-client#0.9.17-overleaf-5"
},
"devDependencies": {
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"cookie-signature": "^1.1.0",
- "mocha": "^10.2.0",
+ "mocha": "^11.1.0",
"sandboxed-module": "~0.3.0",
"sinon": "^9.2.4",
"sinon-chai": "^3.7.0",
diff --git a/services/real-time/test/acceptance/js/ClientTrackingTests.js b/services/real-time/test/acceptance/js/ClientTrackingTests.js
index 415e9ad662..d4b484c0a8 100644
--- a/services/real-time/test/acceptance/js/ClientTrackingTests.js
+++ b/services/real-time/test/acceptance/js/ClientTrackingTests.js
@@ -19,6 +19,80 @@ const FixturesManager = require('./helpers/FixturesManager')
const async = require('async')
describe('clientTracking', function () {
+ describe('when another logged in user joins a project', function () {
+ before(function (done) {
+ return async.series(
+ [
+ cb => {
+ return FixturesManager.setUpProject(
+ {
+ privilegeLevel: 'owner',
+ project: { name: 'Test Project' },
+ },
+ (error, { user_id: userId, project_id: projectId }) => {
+ if (error) return done(error)
+ this.user_id = userId
+ this.project_id = projectId
+ return cb()
+ }
+ )
+ },
+
+ cb => {
+ return FixturesManager.setUpDoc(
+ this.project_id,
+ { lines: this.lines, version: this.version, ops: this.ops },
+ (e, { doc_id: docId }) => {
+ this.doc_id = docId
+ return cb(e)
+ }
+ )
+ },
+
+ cb => {
+ this.clientA = RealTimeClient.connect(this.project_id, cb)
+ },
+
+ cb => {
+ RealTimeClient.countConnectedClients(
+ this.project_id,
+ (err, body) => {
+ if (err) return cb(err)
+ expect(body).to.deep.equal({ nConnectedClients: 1 })
+ cb()
+ }
+ )
+ },
+
+ cb => {
+ this.clientB = RealTimeClient.connect(this.project_id, cb)
+ },
+ ],
+ done
+ )
+ })
+
+ it('should record the initial state in getConnectedUsers', function (done) {
+ this.clientA.emit('clientTracking.getConnectedUsers', (error, users) => {
+ if (error) return done(error)
+ for (const user of Array.from(users)) {
+ if (user.client_id === this.clientB.publicId) {
+ expect(user.cursorData).to.not.exist
+ return done()
+ }
+ }
+ throw new Error('other user was never found')
+ })
+ })
+ it('should list both clients via HTTP', function (done) {
+ RealTimeClient.countConnectedClients(this.project_id, (err, body) => {
+ if (err) return done(err)
+ expect(body).to.deep.equal({ nConnectedClients: 2 })
+ done()
+ })
+ })
+ })
+
describe('when a client updates its cursor location', function () {
before(function (done) {
return async.series(
diff --git a/services/real-time/test/acceptance/js/JoinDocTests.js b/services/real-time/test/acceptance/js/JoinDocTests.js
index 547691d358..3381526c59 100644
--- a/services/real-time/test/acceptance/js/JoinDocTests.js
+++ b/services/real-time/test/acceptance/js/JoinDocTests.js
@@ -89,6 +89,7 @@ describe('joinDoc', function () {
this.version,
this.ops,
this.ranges,
+ 'sharejs-text-ot',
])
})
@@ -168,6 +169,7 @@ describe('joinDoc', function () {
this.version,
this.ops,
this.ranges,
+ 'sharejs-text-ot',
])
})
@@ -247,6 +249,7 @@ describe('joinDoc', function () {
this.version,
this.ops,
this.ranges,
+ 'sharejs-text-ot',
])
})
@@ -408,6 +411,7 @@ describe('joinDoc', function () {
this.version,
this.ops,
this.ranges,
+ 'sharejs-text-ot',
])
})
@@ -489,6 +493,7 @@ describe('joinDoc', function () {
this.version,
this.ops,
this.ranges,
+ 'sharejs-text-ot',
])
})
@@ -504,7 +509,7 @@ describe('joinDoc', function () {
})
})
- return describe('with fromVersion and options', function () {
+ describe('with fromVersion and options', function () {
before(function (done) {
this.fromVersion = 36
this.options = { encodeRanges: true }
@@ -572,6 +577,7 @@ describe('joinDoc', function () {
this.version,
this.ops,
this.ranges,
+ 'sharejs-text-ot',
])
})
@@ -586,4 +592,139 @@ describe('joinDoc', function () {
)
})
})
+
+ describe('with type=history-ot', function () {
+ before(function (done) {
+ async.series(
+ [
+ cb => {
+ FixturesManager.setUpProject(
+ { privilegeLevel: 'owner' },
+ (e, { project_id: projectId, user_id: userId }) => {
+ this.project_id = projectId
+ this.user_id = userId
+ cb(e)
+ }
+ )
+ },
+
+ cb => {
+ FixturesManager.setUpDoc(
+ this.project_id,
+ {
+ lines: this.lines,
+ version: this.version,
+ ops: this.ops,
+ ranges: this.ranges,
+ type: 'history-ot',
+ },
+ (e, { doc_id: docId }) => {
+ this.doc_id = docId
+ cb(e)
+ }
+ )
+ },
+ ],
+ done
+ )
+ })
+
+ describe('when support is indicated', function () {
+ before(function (done) {
+ MockDocUpdaterServer.getDocument.resetHistory()
+ async.series(
+ [
+ cb => {
+ this.client = RealTimeClient.connect(this.project_id, cb)
+ },
+ cb =>
+ this.client.emit(
+ 'joinDoc',
+ this.doc_id,
+ { supportsHistoryOT: true },
+ (error, ...rest) => {
+ ;[...this.returnedArgs] = Array.from(rest)
+ cb(error)
+ }
+ ),
+ ],
+ done
+ )
+ })
+
+ it('should get the doc from the doc updater', function () {
+ MockDocUpdaterServer.getDocument
+ .calledWith(this.project_id, this.doc_id, -1)
+ .should.equal(true)
+ })
+
+ it('should return the doc lines, version, ranges and ops', function () {
+ this.returnedArgs.should.deep.equal([
+ this.lines,
+ this.version,
+ this.ops,
+ this.ranges,
+ 'history-ot',
+ ])
+ })
+
+ it('should have joined the doc room', function (done) {
+ RealTimeClient.getConnectedClient(
+ this.client.socket.sessionid,
+ (error, client) => {
+ if (error) return done(error)
+ expect(client.rooms).to.deep.equal([this.project_id, this.doc_id])
+ done()
+ }
+ )
+ })
+ })
+
+ describe('when support is not indicated', function () {
+ before(function (done) {
+ MockDocUpdaterServer.getDocument.resetHistory()
+ async.series(
+ [
+ cb => {
+ this.client = RealTimeClient.connect(this.project_id, cb)
+ },
+ cb =>
+ this.client.emit('joinDoc', this.doc_id, (error, ...rest) => {
+ this.error = error
+ ;[...this.returnedArgs] = Array.from(rest)
+ cb()
+ }),
+ ],
+ done
+ )
+ })
+
+ it('should get the doc from the doc updater', function () {
+ MockDocUpdaterServer.getDocument
+ .calledWith(this.project_id, this.doc_id, -1)
+ .should.equal(true)
+ })
+
+ it('should return an error', function () {
+ expect(this.error).to.deep.equal({
+ message: 'client does not support history-ot',
+ })
+ })
+
+ it('should not return the doc lines, version, ranges and ops', function () {
+ this.returnedArgs.should.deep.equal([])
+ })
+
+ it('should leave the doc room again', function (done) {
+ RealTimeClient.getConnectedClient(
+ this.client.socket.sessionid,
+ (error, client) => {
+ if (error) return done(error)
+ expect(client.rooms).to.deep.equal([this.project_id])
+ done()
+ }
+ )
+ })
+ })
+ })
})
diff --git a/services/real-time/test/acceptance/js/helpers/FixturesManager.js b/services/real-time/test/acceptance/js/helpers/FixturesManager.js
index 1db0c684c1..66e3072532 100644
--- a/services/real-time/test/acceptance/js/helpers/FixturesManager.js
+++ b/services/real-time/test/acceptance/js/helpers/FixturesManager.js
@@ -108,13 +108,17 @@ module.exports = FixturesManager = {
if (!options.ops) {
options.ops = ['mock', 'ops']
}
- const { doc_id: docId, lines, version, ops, ranges } = options
+ if (!options.type) {
+ options.type = 'sharejs-text-ot'
+ }
+ const { doc_id: docId, lines, version, ops, ranges, type } = options
MockDocUpdaterServer.createMockDoc(projectId, docId, {
lines,
version,
ops,
ranges,
+ type,
})
return MockDocUpdaterServer.run(error => {
if (error != null) {
diff --git a/services/real-time/test/acceptance/js/helpers/RealTimeClient.js b/services/real-time/test/acceptance/js/helpers/RealTimeClient.js
index 7b53f5d5c4..6cc7001896 100644
--- a/services/real-time/test/acceptance/js/helpers/RealTimeClient.js
+++ b/services/real-time/test/acceptance/js/helpers/RealTimeClient.js
@@ -123,6 +123,16 @@ module.exports = Client = {
)
},
+ countConnectedClients(projectId, callback) {
+ request.get(
+ {
+ url: `http://127.0.0.1:3026/project/${projectId}/count-connected-clients`,
+ json: true,
+ },
+ (error, response, data) => callback(error, data)
+ )
+ },
+
getConnectedClient(clientId, callback) {
if (callback == null) {
callback = function () {}
diff --git a/services/real-time/test/unit/js/ConnectedUsersManagerTests.js b/services/real-time/test/unit/js/ConnectedUsersManagerTests.js
index 9026d0bb42..a6864075e0 100644
--- a/services/real-time/test/unit/js/ConnectedUsersManagerTests.js
+++ b/services/real-time/test/unit/js/ConnectedUsersManagerTests.js
@@ -20,6 +20,7 @@ const tk = require('timekeeper')
describe('ConnectedUsersManager', function () {
beforeEach(function () {
+ tk.freeze(new Date())
this.settings = {
redis: {
realtime: {
@@ -30,12 +31,18 @@ describe('ConnectedUsersManager', function () {
connectedUser({ project_id: projectId, client_id: clientId }) {
return `connected_user:${projectId}:${clientId}`
},
+ projectNotEmptySince({ projectId }) {
+ return `projectNotEmptySince:{${projectId}}`
+ },
},
},
},
}
this.rClient = {
auth() {},
+ getdel: sinon.stub(),
+ scard: sinon.stub(),
+ set: sinon.stub(),
setex: sinon.stub(),
sadd: sinon.stub(),
get: sinon.stub(),
@@ -50,11 +57,15 @@ describe('ConnectedUsersManager', function () {
return this.rClient
},
}
- tk.freeze(new Date())
+ this.Metrics = {
+ inc: sinon.stub(),
+ histogram: sinon.stub(),
+ }
this.ConnectedUsersManager = SandboxedModule.require(modulePath, {
requires: {
'@overleaf/settings': this.settings,
+ '@overleaf/metrics': this.Metrics,
'@overleaf/redis-wrapper': {
createClient: () => {
return this.rClient
@@ -83,7 +94,7 @@ describe('ConnectedUsersManager', function () {
describe('updateUserPosition', function () {
beforeEach(function () {
- return this.rClient.exec.callsArgWith(0)
+ this.rClient.exec.yields(null, [1, 1])
})
it('should set a key with the date and give it a ttl', function (done) {
@@ -240,7 +251,7 @@ describe('ConnectedUsersManager', function () {
)
})
- return it('should set the cursor position when provided', function (done) {
+ it('should set the cursor position when provided', function (done) {
return this.ConnectedUsersManager.updateUserPosition(
this.project_id,
this.client_id,
@@ -259,11 +270,72 @@ describe('ConnectedUsersManager', function () {
}
)
})
+
+ describe('editing_session_mode', function () {
+ const cases = {
+ 'should bump the metric when connecting to empty room': {
+ nConnectedClients: 1,
+ cursorData: null,
+ labels: {
+ method: 'connect',
+ status: 'single',
+ },
+ },
+ 'should bump the metric when connecting to non-empty room': {
+ nConnectedClients: 2,
+ cursorData: null,
+ labels: {
+ method: 'connect',
+ status: 'multi',
+ },
+ },
+ 'should bump the metric when updating in empty room': {
+ nConnectedClients: 1,
+ cursorData: { row: 42 },
+ labels: {
+ method: 'update',
+ status: 'single',
+ },
+ },
+ 'should bump the metric when updating in non-empty room': {
+ nConnectedClients: 2,
+ cursorData: { row: 42 },
+ labels: {
+ method: 'update',
+ status: 'multi',
+ },
+ },
+ }
+
+ for (const [
+ name,
+ { nConnectedClients, cursorData, labels },
+ ] of Object.entries(cases)) {
+ it(name, function (done) {
+ this.rClient.exec.yields(null, [1, nConnectedClients])
+ this.ConnectedUsersManager.updateUserPosition(
+ this.project_id,
+ this.client_id,
+ this.user,
+ cursorData,
+ err => {
+ if (err) return done(err)
+ expect(this.Metrics.inc).to.have.been.calledWith(
+ 'editing_session_mode',
+ 1,
+ labels
+ )
+ done()
+ }
+ )
+ })
+ }
+ })
})
describe('markUserAsDisconnected', function () {
beforeEach(function () {
- return this.rClient.exec.callsArgWith(0)
+ this.rClient.exec.yields(null, [1, 0])
})
it('should remove the user from the set', function (done) {
@@ -294,7 +366,7 @@ describe('ConnectedUsersManager', function () {
)
})
- return it('should add a ttl to the connected user set so it stays clean', function (done) {
+ it('should add a ttl to the connected user set so it stays clean', function (done) {
return this.ConnectedUsersManager.markUserAsDisconnected(
this.project_id,
this.client_id,
@@ -310,6 +382,161 @@ describe('ConnectedUsersManager', function () {
}
)
})
+
+ describe('editing_session_mode', function () {
+ const cases = {
+ 'should bump the metric when disconnecting from now empty room': {
+ nConnectedClients: 0,
+ labels: {
+ method: 'disconnect',
+ status: 'empty',
+ },
+ },
+ 'should bump the metric when disconnecting from now single room': {
+ nConnectedClients: 1,
+ labels: {
+ method: 'disconnect',
+ status: 'single',
+ },
+ },
+ 'should bump the metric when disconnecting from now multi room': {
+ nConnectedClients: 2,
+ labels: {
+ method: 'disconnect',
+ status: 'multi',
+ },
+ },
+ }
+
+ for (const [name, { nConnectedClients, labels }] of Object.entries(
+ cases
+ )) {
+ it(name, function (done) {
+ this.rClient.exec.yields(null, [1, nConnectedClients])
+ this.ConnectedUsersManager.markUserAsDisconnected(
+ this.project_id,
+ this.client_id,
+ err => {
+ if (err) return done(err)
+ expect(this.Metrics.inc).to.have.been.calledWith(
+ 'editing_session_mode',
+ 1,
+ labels
+ )
+ done()
+ }
+ )
+ })
+ }
+ })
+
+ describe('projectNotEmptySince', function () {
+ it('should clear the projectNotEmptySince key when empty and skip metric if not set', function (done) {
+ this.rClient.exec.yields(null, [1, 0])
+ this.rClient.getdel.yields(null, '')
+ this.ConnectedUsersManager.markUserAsDisconnected(
+ this.project_id,
+ this.client_id,
+ err => {
+ if (err) return done(err)
+ expect(this.rClient.getdel).to.have.been.calledWith(
+ `projectNotEmptySince:{${this.project_id}}`
+ )
+ expect(this.Metrics.histogram).to.not.have.been.called
+ done()
+ }
+ )
+ })
+ it('should clear the projectNotEmptySince key when empty and record metric if set', function (done) {
+ this.rClient.exec.onFirstCall().yields(null, [1, 0])
+ tk.freeze(1_234_000)
+ this.rClient.getdel.yields(null, '1230')
+ this.ConnectedUsersManager.markUserAsDisconnected(
+ this.project_id,
+ this.client_id,
+ err => {
+ if (err) return done(err)
+ expect(this.rClient.getdel).to.have.been.calledWith(
+ `projectNotEmptySince:{${this.project_id}}`
+ )
+ expect(this.Metrics.histogram).to.have.been.calledWith(
+ 'project_not_empty_since',
+ 4,
+ sinon.match.any,
+ { status: 'empty' }
+ )
+ done()
+ }
+ )
+ })
+ it('should set projectNotEmptySince key when single and skip metric if not set before', function (done) {
+ this.rClient.exec.onFirstCall().yields(null, [1, 1])
+ tk.freeze(1_233_001) // should ceil up
+ this.rClient.exec.onSecondCall().yields(null, [''])
+ this.ConnectedUsersManager.markUserAsDisconnected(
+ this.project_id,
+ this.client_id,
+ err => {
+ if (err) return done(err)
+ expect(this.rClient.set).to.have.been.calledWith(
+ `projectNotEmptySince:{${this.project_id}}`,
+ '1234',
+ 'NX',
+ 'EX',
+ 31 * 24 * 60 * 60
+ )
+ expect(this.Metrics.histogram).to.not.have.been.called
+ done()
+ }
+ )
+ })
+ const cases = {
+ 'should set projectNotEmptySince key when single and record metric if set before':
+ {
+ nConnectedClients: 1,
+ labels: {
+ status: 'single',
+ },
+ },
+ 'should set projectNotEmptySince key when multi and record metric if set before':
+ {
+ nConnectedClients: 2,
+ labels: {
+ status: 'multi',
+ },
+ },
+ }
+ for (const [name, { nConnectedClients, labels }] of Object.entries(
+ cases
+ )) {
+ it(name, function (done) {
+ this.rClient.exec.onFirstCall().yields(null, [1, nConnectedClients])
+ tk.freeze(1_235_000)
+ this.rClient.exec.onSecondCall().yields(null, ['1230'])
+ this.ConnectedUsersManager.markUserAsDisconnected(
+ this.project_id,
+ this.client_id,
+ err => {
+ if (err) return done(err)
+ expect(this.rClient.set).to.have.been.calledWith(
+ `projectNotEmptySince:{${this.project_id}}`,
+ '1235',
+ 'NX',
+ 'EX',
+ 31 * 24 * 60 * 60
+ )
+ expect(this.Metrics.histogram).to.have.been.calledWith(
+ 'project_not_empty_since',
+ 5,
+ sinon.match.any,
+ labels
+ )
+ done()
+ }
+ )
+ })
+ }
+ })
})
describe('_getConnectedUser', function () {
diff --git a/services/real-time/test/unit/js/DocumentUpdaterManagerTests.js b/services/real-time/test/unit/js/DocumentUpdaterManagerTests.js
index 6dea5401f0..ecf45cd452 100644
--- a/services/real-time/test/unit/js/DocumentUpdaterManagerTests.js
+++ b/services/real-time/test/unit/js/DocumentUpdaterManagerTests.js
@@ -79,7 +79,7 @@ describe('DocumentUpdaterManager', function () {
})
it('should get the document from the document updater', function () {
- const url = `${this.settings.apis.documentupdater.url}/project/${this.project_id}/doc/${this.doc_id}?fromVersion=${this.fromVersion}`
+ const url = `${this.settings.apis.documentupdater.url}/project/${this.project_id}/doc/${this.doc_id}?fromVersion=${this.fromVersion}&historyOTSupport=true`
return this.request.get.calledWith(url).should.equal(true)
})
diff --git a/services/references/.eslintrc b/services/references/.eslintrc
new file mode 100644
index 0000000000..cc68024d9d
--- /dev/null
+++ b/services/references/.eslintrc
@@ -0,0 +1,6 @@
+{
+ "parserOptions": {
+ "ecmaVersion": 2022,
+ "sourceType": "module"
+ }
+}
diff --git a/services/contacts/.gitignore b/services/references/.gitignore
similarity index 100%
rename from services/contacts/.gitignore
rename to services/references/.gitignore
diff --git a/services/references/.mocharc.json b/services/references/.mocharc.json
new file mode 100644
index 0000000000..dc3280aa96
--- /dev/null
+++ b/services/references/.mocharc.json
@@ -0,0 +1,3 @@
+{
+ "require": "test/setup.js"
+}
diff --git a/services/references/.nvmrc b/services/references/.nvmrc
new file mode 100644
index 0000000000..0254b1e633
--- /dev/null
+++ b/services/references/.nvmrc
@@ -0,0 +1 @@
+20.18.2
diff --git a/services/references/Dockerfile b/services/references/Dockerfile
new file mode 100644
index 0000000000..caa6e2a31c
--- /dev/null
+++ b/services/references/Dockerfile
@@ -0,0 +1,27 @@
+# This file was auto-generated, do not edit it directly.
+# Instead run bin/update_build_scripts from
+# https://github.com/overleaf/internal/
+
+FROM node:20.18.2 AS base
+
+WORKDIR /overleaf/services/references
+
+# Google Cloud Storage needs a writable $HOME/.config for resumable uploads
+# (see https://googleapis.dev/nodejs/storage/latest/File.html#createWriteStream)
+RUN mkdir /home/node/.config && chown node:node /home/node/.config
+
+FROM base AS app
+
+COPY package.json package-lock.json /overleaf/
+COPY services/references/package.json /overleaf/services/references/
+COPY libraries/ /overleaf/libraries/
+COPY patches/ /overleaf/patches/
+
+RUN cd /overleaf && npm ci --quiet
+
+COPY services/references/ /overleaf/services/references/
+
+FROM app
+USER node
+
+CMD ["node", "--expose-gc", "app.js"]
diff --git a/services/references/LICENSE b/services/references/LICENSE
new file mode 100644
index 0000000000..ac8619dcb9
--- /dev/null
+++ b/services/references/LICENSE
@@ -0,0 +1,662 @@
+
+ GNU AFFERO GENERAL PUBLIC LICENSE
+ Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+ A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate. Many developers of free software are heartened and
+encouraged by the resulting cooperation. However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+ The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community. It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server. Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+ An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals. This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU Affero General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Remote Network Interaction; Use with the GNU General Public License.
+
+ Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software. This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time. Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source. For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code. There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+ .
diff --git a/services/references/Makefile b/services/references/Makefile
new file mode 100644
index 0000000000..e5181b46f3
--- /dev/null
+++ b/services/references/Makefile
@@ -0,0 +1,156 @@
+# This file was auto-generated, do not edit it directly.
+# Instead run bin/update_build_scripts from
+# https://github.com/overleaf/internal/
+
+BUILD_NUMBER ?= local
+BRANCH_NAME ?= $(shell git rev-parse --abbrev-ref HEAD)
+PROJECT_NAME = references
+BUILD_DIR_NAME = $(shell pwd | xargs basename | tr -cd '[a-zA-Z0-9_.\-]')
+
+DOCKER_COMPOSE_FLAGS ?= -f docker-compose.yml
+DOCKER_COMPOSE := BUILD_NUMBER=$(BUILD_NUMBER) \
+ BRANCH_NAME=$(BRANCH_NAME) \
+ PROJECT_NAME=$(PROJECT_NAME) \
+ MOCHA_GREP=${MOCHA_GREP} \
+ docker compose ${DOCKER_COMPOSE_FLAGS}
+
+COMPOSE_PROJECT_NAME_TEST_ACCEPTANCE ?= test_acceptance_$(BUILD_DIR_NAME)
+DOCKER_COMPOSE_TEST_ACCEPTANCE = \
+ COMPOSE_PROJECT_NAME=$(COMPOSE_PROJECT_NAME_TEST_ACCEPTANCE) $(DOCKER_COMPOSE)
+
+COMPOSE_PROJECT_NAME_TEST_UNIT ?= test_unit_$(BUILD_DIR_NAME)
+DOCKER_COMPOSE_TEST_UNIT = \
+ COMPOSE_PROJECT_NAME=$(COMPOSE_PROJECT_NAME_TEST_UNIT) $(DOCKER_COMPOSE)
+
+clean:
+ -docker rmi ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
+ -docker rmi us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
+ -$(DOCKER_COMPOSE_TEST_UNIT) down --rmi local
+ -$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down --rmi local
+
+HERE=$(shell pwd)
+MONOREPO=$(shell cd ../../ && pwd)
+# Run the linting commands in the scope of the monorepo.
+# Eslint and prettier (plus some configs) are on the root.
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.2 npm run --silent
+
+RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
+
+# Same but from the top of the monorepo
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.2 npm run --silent
+
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
+
+format:
+ $(RUN_LINTING) format
+
+format_ci:
+ $(RUN_LINTING_CI) format
+
+format_fix:
+ $(RUN_LINTING) format:fix
+
+lint:
+ $(RUN_LINTING) lint
+
+lint_ci:
+ $(RUN_LINTING_CI) lint
+
+lint_fix:
+ $(RUN_LINTING) lint:fix
+
+typecheck:
+ $(RUN_LINTING) types:check
+
+typecheck_ci:
+ $(RUN_LINTING_CI) types:check
+
+test: format lint typecheck shellcheck test_unit test_acceptance
+
+test_unit:
+ifneq (,$(wildcard test/unit))
+ $(DOCKER_COMPOSE_TEST_UNIT) run --rm test_unit
+ $(MAKE) test_unit_clean
+endif
+
+test_clean: test_unit_clean
+test_unit_clean:
+ifneq (,$(wildcard test/unit))
+ $(DOCKER_COMPOSE_TEST_UNIT) down -v -t 0
+endif
+
+test_acceptance: test_acceptance_clean test_acceptance_pre_run test_acceptance_run
+ $(MAKE) test_acceptance_clean
+
+test_acceptance_debug: test_acceptance_clean test_acceptance_pre_run test_acceptance_run_debug
+ $(MAKE) test_acceptance_clean
+
+test_acceptance_run:
+ifneq (,$(wildcard test/acceptance))
+ $(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance
+endif
+
+test_acceptance_run_debug:
+ifneq (,$(wildcard test/acceptance))
+ $(DOCKER_COMPOSE_TEST_ACCEPTANCE) run -p 127.0.0.9:19999:19999 --rm test_acceptance npm run test:acceptance -- --inspect=0.0.0.0:19999 --inspect-brk
+endif
+
+test_clean: test_acceptance_clean
+test_acceptance_clean:
+ $(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
+
+test_acceptance_pre_run:
+ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
+ $(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
+endif
+
+benchmarks:
+ $(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance npm run benchmarks
+
+build:
+ docker build \
+ --pull \
+ --build-arg BUILDKIT_INLINE_CACHE=1 \
+ --tag ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
+ --tag us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
+ --tag us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME) \
+ --cache-from us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME) \
+ --cache-from us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):main \
+ --file Dockerfile \
+ ../..
+
+tar:
+ $(DOCKER_COMPOSE) up tar
+
+publish:
+
+ docker push $(DOCKER_REPO)/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
+
+
+.PHONY: clean \
+ format format_fix \
+ lint lint_fix \
+ build_types typecheck \
+ lint_ci format_ci typecheck_ci \
+ shellcheck shellcheck_fix \
+ test test_clean test_unit test_unit_clean \
+ test_acceptance test_acceptance_debug test_acceptance_pre_run \
+ test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
+ benchmarks \
+ build tar publish \
diff --git a/services/references/README.md b/services/references/README.md
new file mode 100644
index 0000000000..41844d259a
--- /dev/null
+++ b/services/references/README.md
@@ -0,0 +1,10 @@
+overleaf/references
+===============
+
+An API for providing citation-keys from user bib-files
+
+License
+=======
+The code in this repository is released under the GNU AFFERO GENERAL PUBLIC LICENSE, version 3.
+
+Based on https://github.com/overleaf/overleaf/commit/9964aebc794f9fd7ce1373ab3484f6b33b061af3
diff --git a/services/references/app.js b/services/references/app.js
new file mode 100644
index 0000000000..a7da8720ed
--- /dev/null
+++ b/services/references/app.js
@@ -0,0 +1,40 @@
+import '@overleaf/metrics/initialize.js'
+
+import express from 'express'
+import Settings from '@overleaf/settings'
+import logger from '@overleaf/logger'
+import metrics from '@overleaf/metrics'
+import ReferencesAPIController from './app/js/ReferencesAPIController.js'
+import bodyParser from 'body-parser'
+
+const app = express()
+metrics.injectMetricsRoute(app)
+
+app.use(bodyParser.json({ limit: '2mb' }))
+app.use(metrics.http.monitor(logger))
+
+app.post('/project/:project_id/index', ReferencesAPIController.index)
+app.get('/status', (req, res) => res.send({ status: 'references api is up' }))
+
+const settings =
+ Settings.internal && Settings.internal.references
+ ? Settings.internal.references
+ : undefined
+const host = settings && settings.host ? settings.host : 'localhost'
+const port = settings && settings.port ? settings.port : 3056
+
+logger.debug('Listening at', { host, port })
+
+const server = app.listen(port, host, function (error) {
+ if (error) {
+ throw error
+ }
+ logger.info({ host, port }, 'references HTTP server starting up')
+})
+
+process.on('SIGTERM', () => {
+ server.close(() => {
+ logger.info({ host, port }, 'references HTTP server closed')
+ metrics.close()
+ })
+})
diff --git a/services/references/app/js/ReferencesAPIController.js b/services/references/app/js/ReferencesAPIController.js
new file mode 100644
index 0000000000..ac51ca6bbd
--- /dev/null
+++ b/services/references/app/js/ReferencesAPIController.js
@@ -0,0 +1,42 @@
+import logger from '@overleaf/logger'
+import BibtexParser from './bib2json.js'
+
+export default {
+ async index(req, res) {
+ const { docUrls, fullIndex } = req.body
+ try {
+ const responses = await Promise.all(
+ docUrls.map(async (docUrl) => {
+ try {
+ const response = await fetch(docUrl)
+ if (!response.ok) {
+ throw new Error(`HTTP error! status: ${response.status}`)
+ }
+ return response.text()
+ } catch (error) {
+ logger.error({ error }, "Failed to fetch document from URL: " + docUrl)
+ return null
+ }
+ })
+ )
+ const keys = []
+ for (const body of responses) {
+ if (!body) continue
+
+ try {
+ const parsedEntries = BibtexParser(body).entries
+ const ks = parsedEntries
+ .filter(entry => entry.EntryKey)
+ .map(entry => entry.EntryKey)
+ keys.push(...ks)
+ } catch (error) {
+ logger.error({ error }, "bib file skipped.")
+ }
+ }
+ res.status(200).json({ keys })
+ } catch (error) {
+ logger.error({ error }, "Unexpected error during indexing process.")
+ res.status(500).json({ error: "Failed to process bib files." })
+ }
+ }
+}
diff --git a/services/references/app/js/bib2json.js b/services/references/app/js/bib2json.js
new file mode 100644
index 0000000000..99cfcf70ee
--- /dev/null
+++ b/services/references/app/js/bib2json.js
@@ -0,0 +1,1967 @@
+/* eslint-disable */
+/**
+ * Parser.js
+ * Copyright 2012-13 Mayank Lahiri
+ * mlahiri@gmail.com
+ * Released under the BSD License.
+ *
+ * Modifications 2016 Sharelatex
+ * Modifications 2017-2020 Overleaf
+ *
+ * A forgiving Bibtex parser that can:
+ *
+ * (1) operate in streaming or block mode, extracting entries as dictionaries.
+ * (2) convert Latex special characters to UTF-8.
+ * (3) best-effort parse malformed entries.
+ * (4) run in a CommonJS environment or a browser, without any dependencies.
+ * (5) be advanced-compiled by Google Closure Compiler.
+ *
+ * Handwritten as a labor of love, not auto-generated from a grammar.
+ *
+ * Modes of usage:
+ *
+ * (1) Synchronous, string
+ *
+ * var entries = BibtexParser(text);
+ * console.log(entries);
+ *
+ * (2) Asynchronous, stream
+ *
+ * function entryCallback(entry) { console.log(entry); }
+ * var parser = new BibtexParser(entryCallback);
+ * parser.parse(chunk1);
+ * parser.parse(chunk2);
+ * ...
+ *
+ * @param {text|function(Object)} arg0 Either a Bibtex string or callback
+ * function for processing parsed entries.
+ * @param {array} allowedKeys optimization: do not output key/value pairs that are not on this allowlist
+ * @constructor
+ */
+function BibtexParser(arg0, allowedKeys) {
+ // Determine how this function is to be used
+ if (typeof arg0 === 'string') {
+ // Passed a string, synchronous call without 'new'
+ const entries = []
+ function accumulator(entry) {
+ entries.push(entry)
+ }
+ const parser = new BibtexParser(accumulator, allowedKeys)
+ parser.parse(arg0)
+ return {
+ entries,
+ errors: parser.getErrors(),
+ }
+ }
+ if (typeof arg0 !== 'function') {
+ throw 'Invalid parser construction.'
+ }
+ this.ALLOWEDKEYS_ = allowedKeys || []
+ this.reset_(arg0)
+ this.initMacros_()
+ return this
+}
+
+/** @enum {number} */
+BibtexParser.prototype.STATES_ = {
+ ENTRY_OR_JUNK: 0,
+ OBJECT_TYPE: 1,
+ ENTRY_KEY: 2,
+ KV_KEY: 3,
+ EQUALS: 4,
+ KV_VALUE: 5,
+}
+BibtexParser.prototype.reset_ = function (arg0) {
+ /** @private */ this.DATA_ = {}
+ /** @private */ this.CALLBACK_ = arg0
+ /** @private */ this.CHAR_ = 0
+ /** @private */ this.LINE_ = 1
+ /** @private */ this.CHAR_IN_LINE_ = 0
+ /** @private */ this.SKIPWS_ = true
+ /** @private */ this.SKIPCOMMENT_ = true
+ /** @private */ this.SKIPKVPAIR_ = false
+ /** @private */ this.PARSETMP_ = {}
+ /** @private */ this.SKIPTILLEOL_ = false
+ /** @private */ this.VALBRACES_ = null
+ /** @private */ this.BRACETYPE_ = null
+ /** @private */ this.BRACECOUNT_ = 0
+ /** @private */ this.STATE_ = this.STATES_.ENTRY_OR_JUNK
+ /** @private */ this.ERRORS_ = []
+}
+/** @private */ BibtexParser.prototype.ENTRY_TYPES_ = {
+ inproceedings: 1,
+ proceedings: 2,
+ article: 3,
+ techreport: 4,
+ misc: 5,
+ mastersthesis: 6,
+ book: 7,
+ phdthesis: 8,
+ incollection: 9,
+ unpublished: 10,
+ inbook: 11,
+ manual: 12,
+ periodical: 13,
+ booklet: 14,
+ masterthesis: 15,
+ conference: 16,
+ /* additional fields from biblatex */
+ artwork: 17,
+ audio: 18,
+ bibnote: 19,
+ bookinbook: 20,
+ collection: 21,
+ commentary: 22,
+ customa: 23,
+ customb: 24,
+ customc: 25,
+ customd: 26,
+ custome: 27,
+ customf: 28,
+ image: 29,
+ inreference: 30,
+ jurisdiction: 31,
+ legal: 32,
+ legislation: 33,
+ letter: 34,
+ movie: 35,
+ music: 36,
+ mvbook: 37,
+ mvcollection: 38,
+ mvproceedings: 39,
+ mvreference: 40,
+ online: 41,
+ patent: 42,
+ performance: 43,
+ reference: 44,
+ report: 45,
+ review: 46,
+ set: 47,
+ software: 48,
+ standard: 49,
+ suppbook: 50,
+ suppcollection: 51,
+ thesis: 52,
+ video: 53,
+}
+BibtexParser.prototype.initMacros_ = function () {
+ // macros can be extended by the user via
+ // @string { macroName = "macroValue" }
+ /** @private */ this.MACROS_ = {
+ jan: 'January',
+ feb: 'February',
+ mar: 'March',
+ apr: 'April',
+ may: 'May',
+ jun: 'June',
+ jul: 'July',
+ aug: 'August',
+ sep: 'September',
+ oct: 'October',
+ nov: 'November',
+ dec: 'December',
+ Jan: 'January',
+ Feb: 'February',
+ Mar: 'March',
+ Apr: 'April',
+ May: 'May',
+ Jun: 'June',
+ Jul: 'July',
+ Aug: 'August',
+ Sep: 'September',
+ Oct: 'October',
+ Nov: 'November',
+ Dec: 'December',
+ }
+}
+
+/**
+ * Gets an array of all errors encountered during parsing.
+ * Array entries are of the format:
+ * [ line number, character in line, character in stream, error text ]
+ *
+ * @returns Array
+ * @public
+ */
+BibtexParser.prototype.getErrors = function () {
+ return this.ERRORS_
+}
+
+/**
+ * Processes a chunk of data
+ * @public
+ */
+BibtexParser.prototype.parse = function (chunk) {
+ for (let i = 0; i < chunk.length; i++) this.processCharacter_(chunk[i])
+}
+
+/**
+ * Logs error at current stream position.
+ *
+ * @private
+ */
+BibtexParser.prototype.error_ = function (text) {
+ this.ERRORS_.push([this.LINE_, this.CHAR_IN_LINE_, this.CHAR_, text])
+}
+
+/**
+ * Called after an entire entry has been parsed from the stream.
+ * Performs post-processing and invokes the entry callback pointed to by
+ * this.CALLBACK_. Parsed (but unprocessed) entry data is in this.DATA_.
+ */
+BibtexParser.prototype.processEntry_ = function () {
+ const data = this.DATA_
+ if (data.Fields)
+ for (const f in data.Fields) {
+ let raw = data.Fields[f]
+
+ // Convert Latex/Bibtex special characters to UTF-8 equivalents
+ for (let i = 0; i < this.CHARCONV_.length; i++) {
+ const re = this.CHARCONV_[i][0]
+ const rep = this.CHARCONV_[i][1]
+ raw = raw.replace(re, rep)
+ }
+
+ // Basic substitutions
+ raw = raw
+ .replace(/[\n\r\t]/g, ' ')
+ .replace(/\s\s+/g, ' ')
+ .replace(/^\s+|\s+$/g, '')
+
+ // Remove braces and backslashes
+ const len = raw.length
+ let processedArr = []
+ for (let i = 0; i < len; i++) {
+ let c = raw[i]
+ let skip = false
+ if (c == '\\' && i < len - 1) c = raw[++i]
+ else {
+ if (c == '{' || c == '}') skip = true
+ }
+ if (!skip) processedArr.push(c)
+ }
+ data.Fields[f] = processedArr.join('')
+ processedArr = null
+ }
+
+ if (data.ObjectType == 'string') {
+ for (const f in data.Fields) {
+ this.MACROS_[f] = data.Fields[f]
+ }
+ } else {
+ // Parsed a new Bibtex entry
+ this.CALLBACK_(data)
+ }
+}
+
+/**
+ * Processes next character in the stream, invoking the callback after
+ * each entry has been found and processed.
+ *
+ * @private
+ * @param {string} c Next character in input stream
+ */
+BibtexParser.prototype.processCharacter_ = function (c) {
+ // Housekeeping
+ this.CHAR_++
+ this.CHAR_IN_LINE_++
+ if (c == '\n') {
+ this.LINE_++
+ this.CHAR_IN_LINE_ = 1
+ }
+
+ // Convenience states for skipping whitespace when needed
+ if (this.SKIPTILLEOL_) {
+ if (c == '\n') this.SKIPTILLEOL_ = false
+ return
+ }
+ if (this.SKIPCOMMENT_ && c == '%') {
+ this.SKIPTILLEOL_ = true
+ return
+ }
+ if (this.SKIPWS_ && /\s/.test(c)) return
+ this.SKIPWS_ = false
+ this.SKIPCOMMENT_ = false
+ this.SKIPTILLEOL_ = false
+
+ // Main state machine
+ let AnotherIteration = true
+ while (AnotherIteration) {
+ // console.log(this.LINE_, this.CHAR_IN_LINE_, this.STATE_, c)
+ AnotherIteration = false
+ switch (this.STATE_) {
+ // -- Scan for an object marker ('@')
+ // -- Reset temporary data structure in case previous entry was garbled
+ case this.STATES_.ENTRY_OR_JUNK:
+ if (c == '@') {
+ // SUCCESS: Parsed a valid start-of-object marker.
+ // NEXT_STATE: OBJECT_TYPE
+ this.STATE_ = this.STATES_.OBJECT_TYPE
+ this.DATA_ = {
+ ObjectType: '',
+ }
+ }
+ this.BRACETYPE_ = null
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ break
+
+ // Start at first non-whitespace character after start-of-object '@'
+ // -- Accept [A-Za-z], break on non-matching character
+ // -- Populate this.DATA_.EntryType and this.DATA_.ObjectType
+ case this.STATES_.OBJECT_TYPE:
+ if (/[A-Za-z]/.test(c)) {
+ this.DATA_.ObjectType += c.toLowerCase()
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ } else {
+ // Break from state and validate object type
+ const ot = this.DATA_.ObjectType
+ if (ot == 'comment') {
+ this.STATE_ = this.STATES_.ENTRY_OR_JUNK
+ } else {
+ if (ot == 'string') {
+ this.DATA_.ObjectType = ot
+ this.DATA_.Fields = {}
+ this.BRACETYPE_ = c
+ this.BRACECOUNT_ = 1
+ this.STATE_ = this.STATES_.KV_KEY
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ this.PARSETMP_ = {
+ Key: '',
+ }
+ } else {
+ if (ot == 'preamble') {
+ this.STATE_ = this.STATES_.ENTRY_OR_JUNK
+ } else {
+ if (ot in this.ENTRY_TYPES_) {
+ // SUCCESS: Parsed a valid object type.
+ // NEXT_STATE: ENTRY_KEY
+ this.DATA_.ObjectType = 'entry'
+ this.DATA_.EntryType = ot
+ this.DATA_.EntryKey = ''
+ this.STATE_ = this.STATES_.ENTRY_KEY
+ AnotherIteration = true
+ } else {
+ // ERROR: Unrecognized object type.
+ // NEXT_STATE: ENTRY_OR_JUNK
+ this.error_(
+ 'Unrecognized object type: "' + this.DATA_.ObjectType + '"'
+ )
+ this.STATE_ = this.STATES_.ENTRY_OR_JUNK
+ }
+ }
+ }
+ }
+ }
+ break
+
+ // Start at first non-alphabetic character after an entry type
+ // -- Populate this.DATA_.EntryKey
+ case this.STATES_.ENTRY_KEY:
+ if ((c === '{' || c === '(') && this.BRACETYPE_ == null) {
+ this.BRACETYPE_ = c
+ this.BRACECOUNT_ = 1
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ break
+ }
+ if (/[,%\s]/.test(c)) {
+ if (this.DATA_.EntryKey.length < 1) {
+ // Skip comments and whitespace before entry key
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ } else {
+ if (this.BRACETYPE_ == null) {
+ // ERROR: No opening brace for object
+ // NEXT_STATE: ENTRY_OR_JUNK
+ this.error_('No opening brace for object.')
+ this.STATE_ = this.STATES_.ENTRY_OR_JUNK
+ } else {
+ // SUCCESS: Parsed an entry key
+ // NEXT_STATE: KV_KEY
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ AnotherIteration = true
+ this.STATE_ = this.STATES_.KV_KEY
+ this.PARSETMP_.Key = ''
+ this.DATA_.Fields = {}
+ }
+ }
+ } else {
+ this.DATA_.EntryKey += c
+ this.SKIPWS_ = false
+ this.SKIPCOMMENT_ = false
+ }
+ break
+
+ // Start at first non-whitespace/comment character after entry key.
+ // -- Populate this.PARSETMP_.Key
+ case this.STATES_.KV_KEY:
+ // Test for end of entry
+ if (
+ (c == '}' && this.BRACETYPE_ == '{') ||
+ (c == ')' && this.BRACETYPE_ == '(')
+ ) {
+ // SUCCESS: Parsed an entry, possible incomplete
+ // NEXT_STATE: ENTRY_OR_JUNK
+ this.processEntry_()
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ this.STATE_ = this.STATES_.ENTRY_OR_JUNK
+ break
+ }
+ if (/[\-A-Za-z:]/.test(c)) {
+ // Add to key
+ this.PARSETMP_.Key += c
+ this.SKIPWS_ = false
+ this.SKIPCOMMENT_ = false
+ } else {
+ // Either end of key or we haven't encountered start of key
+ if (this.PARSETMP_.Key.length < 1) {
+ // Keep going till we see a key
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ } else {
+ // SUCCESS: Found full key in K/V pair
+ // NEXT_STATE: EQUALS
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ this.STATE_ = this.STATES_.EQUALS
+ AnotherIteration = true
+
+ if (this.DATA_.ObjectType !== 'string') {
+ // this entry is not a macro
+ // normalize the key to lower case
+ this.PARSETMP_.Key = this.PARSETMP_.Key.toLowerCase()
+
+ // optimization: skip key/value pairs that are not on the allowlist
+ this.SKIPKVPAIR_ =
+ // has allowedKeys set
+ this.ALLOWEDKEYS_.length &&
+ // key is not on the allowlist
+ this.ALLOWEDKEYS_.indexOf(this.PARSETMP_.Key) === -1
+ } else {
+ this.SKIPKVPAIR_ = false
+ }
+ }
+ }
+ break
+
+ // Start at first non-alphabetic character after K/V pair key.
+ case this.STATES_.EQUALS:
+ if (
+ (c == '}' && this.BRACETYPE_ == '{') ||
+ (c == ')' && this.BRACETYPE_ == '(')
+ ) {
+ // ERROR: K/V pair with key but no value
+ // NEXT_STATE: ENTRY_OR_JUNK
+ this.error_(
+ 'Key-value pair has key "' + this.PARSETMP_.Key + '", but no value.'
+ )
+ this.processEntry_()
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ this.STATE_ = this.STATES_.ENTRY_OR_JUNK
+ break
+ }
+ if (c == '=') {
+ // SUCCESS: found an equal signs separating key and value
+ // NEXT_STATE: KV_VALUE
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ this.STATE_ = this.STATES_.KV_VALUE
+ this.PARSETMP_.Value = []
+ this.VALBRACES_ = { '"': [], '{': [] }
+ }
+ break
+
+ // Start at first non-whitespace/comment character after '='
+ // -- Populate this.PARSETMP_.Value
+ case this.STATES_.KV_VALUE:
+ const delim = this.VALBRACES_
+ // valueCharsArray is the list of characters that make up the
+ // current value
+ const valueCharsArray = this.PARSETMP_.Value
+ let doneParsingValue = false
+
+ // Test for special characters
+ if (c == '"' || c == '{' || c == '}' || c == ',') {
+ if (c == ',') {
+ // This comma can mean:
+ // (1) just another comma literal
+ // (2) end of a macro reference
+ if (delim['"'].length + delim['{'].length === 0) {
+ // end of a macro reference
+ const macro = this.PARSETMP_.Value.join('').trim()
+ if (macro in this.MACROS_) {
+ // Successful macro reference
+ this.PARSETMP_.Value = [this.MACROS_[macro]]
+ } else {
+ // Reference to an undefined macro
+ this.error_('Reference to an undefined macro: ' + macro)
+ }
+ doneParsingValue = true
+ }
+ }
+ if (c == '"') {
+ // This quote can mean:
+ // (1) opening delimiter
+ // (2) closing delimiter
+ // (3) literal, if we have a '{' on the stack
+ if (delim['"'].length + delim['{'].length === 0) {
+ // opening delimiter
+ delim['"'].push(this.CHAR_)
+ this.SKIPWS_ = false
+ this.SKIPCOMMENT_ = false
+ break
+ }
+ if (
+ delim['"'].length == 1 &&
+ delim['{'].length == 0 &&
+ (valueCharsArray.length == 0 ||
+ valueCharsArray[valueCharsArray.length - 1] != '\\')
+ ) {
+ // closing delimiter
+ doneParsingValue = true
+ } else {
+ // literal, add to value
+ }
+ }
+ if (c == '{') {
+ // This brace can mean:
+ // (1) opening delimiter
+ // (2) stacked verbatim delimiter
+ if (
+ valueCharsArray.length == 0 ||
+ valueCharsArray[valueCharsArray.length - 1] != '\\'
+ ) {
+ delim['{'].push(this.CHAR_)
+ this.SKIPWS_ = false
+ this.SKIPCOMMENT_ = false
+ } else {
+ // literal, add to value
+ }
+ }
+ if (c == '}') {
+ // This brace can mean:
+ // (1) closing delimiter
+ // (2) closing stacked verbatim delimiter
+ // (3) end of object definition if value was a macro
+ if (delim['"'].length + delim['{'].length === 0) {
+ // end of object definition, after macro
+ const macro = this.PARSETMP_.Value.join('').trim()
+ if (macro in this.MACROS_) {
+ // Successful macro reference
+ this.PARSETMP_.Value = [this.MACROS_[macro]]
+ } else {
+ // Reference to an undefined macro
+ this.error_('Reference to an undefined macro: ' + macro)
+ }
+ AnotherIteration = true
+ doneParsingValue = true
+ } else {
+ // sometimes imported bibs will have {\},{\\}, {\\\}, {\\\\}, etc for whitespace,
+ // which would otherwise break the parsing. we watch for these occurences of
+ // 1+ backslashes in an empty bracket pair to gracefully handle the malformed bib file
+ const doubleSlash =
+ valueCharsArray.length >= 2 &&
+ valueCharsArray[valueCharsArray.length - 1] === '\\' && // for \\}
+ valueCharsArray[valueCharsArray.length - 2] === '\\'
+ const singleSlash =
+ valueCharsArray.length >= 2 &&
+ valueCharsArray[valueCharsArray.length - 1] === '\\' && // for {\}
+ valueCharsArray[valueCharsArray.length - 2] === '{'
+
+ if (
+ valueCharsArray.length == 0 ||
+ valueCharsArray[valueCharsArray.length - 1] != '\\' || // for }
+ doubleSlash ||
+ singleSlash
+ ) {
+ if (delim['{'].length > 0) {
+ // pop stack for stacked verbatim delimiter
+ delim['{'].splice(delim['{'].length - 1, 1)
+ if (delim['{'].length + delim['"'].length == 0) {
+ // closing delimiter
+ doneParsingValue = true
+ } else {
+ // end verbatim block
+ }
+ }
+ } else {
+ // literal, add to value
+ }
+ }
+ }
+ }
+
+ // If here, then we are either done parsing the value or
+ // have a literal that should be added to the value.
+ if (doneParsingValue) {
+ // SUCCESS: value parsed
+ // NEXT_STATE: KV_KEY
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ this.STATE_ = this.STATES_.KV_KEY
+ if (!this.SKIPKVPAIR_) {
+ this.DATA_.Fields[this.PARSETMP_.Key] =
+ this.PARSETMP_.Value.join('')
+ }
+ this.PARSETMP_ = { Key: '' }
+ this.VALBRACES_ = null
+ } else {
+ this.PARSETMP_.Value.push(c)
+ if (this.PARSETMP_.Value.length >= 1000 * 20) {
+ this.PARSETMP_.Value = []
+ this.STATE_ = this.STATES_.ENTRY_OR_JUNK
+ this.DATA_ = { ObjectType: '' }
+ this.BRACETYPE_ = null
+ this.SKIPWS_ = true
+ this.SKIPCOMMENT_ = true
+ }
+ }
+ break
+ } // end switch (this.STATE_)
+ } // end while(AnotherIteration)
+} // end function processCharacter
+
+/** @private */ BibtexParser.prototype.CHARCONV_ = [
+ [/\\space /g, '\u0020'],
+ [/\\textdollar /g, '\u0024'],
+ [/\\textquotesingle /g, '\u0027'],
+ [/\\ast /g, '\u002A'],
+ [/\\textbackslash /g, '\u005C'],
+ [/\\\^\{\}/g, '\u005E'],
+ [/\\textasciigrave /g, '\u0060'],
+ [/\\lbrace /g, '\u007B'],
+ [/\\vert /g, '\u007C'],
+ [/\\rbrace /g, '\u007D'],
+ [/\\textasciitilde /g, '\u007E'],
+ [/\\textexclamdown /g, '\u00A1'],
+ [/\\textcent /g, '\u00A2'],
+ [/\\textsterling /g, '\u00A3'],
+ [/\\textcurrency /g, '\u00A4'],
+ [/\\textyen /g, '\u00A5'],
+ [/\\textbrokenbar /g, '\u00A6'],
+ [/\\textsection /g, '\u00A7'],
+ [/\\textasciidieresis /g, '\u00A8'],
+ [/\\textcopyright /g, '\u00A9'],
+ [/\\textordfeminine /g, '\u00AA'],
+ [/\\guillemotleft /g, '\u00AB'],
+ [/\\lnot /g, '\u00AC'],
+ [/\\textregistered /g, '\u00AE'],
+ [/\\textasciimacron /g, '\u00AF'],
+ [/\\textdegree /g, '\u00B0'],
+ [/\\pm /g, '\u00B1'],
+ [/\\textasciiacute /g, '\u00B4'],
+ [/\\mathrm\{\\mu\}/g, '\u00B5'],
+ [/\\textparagraph /g, '\u00B6'],
+ [/\\cdot /g, '\u00B7'],
+ [/\\c\{\}/g, '\u00B8'],
+ [/\\textordmasculine /g, '\u00BA'],
+ [/\\guillemotright /g, '\u00BB'],
+ [/\\textonequarter /g, '\u00BC'],
+ [/\\textonehalf /g, '\u00BD'],
+ [/\\textthreequarters /g, '\u00BE'],
+ [/\\textquestiondown /g, '\u00BF'],
+ [/\\`\{A\}/g, '\u00C0'],
+ [/\\'\{A\}/g, '\u00C1'],
+ [/\\\^\{A\}/g, '\u00C2'],
+ [/\\~\{A\}/g, '\u00C3'],
+ [/\\"\{A\}/g, '\u00C4'],
+ [/\\AA /g, '\u00C5'],
+ [/\\AE /g, '\u00C6'],
+ [/\\c\{C\}/g, '\u00C7'],
+ [/\\`\{E\}/g, '\u00C8'],
+ [/\\'\{E\}/g, '\u00C9'],
+ [/\\\^\{E\}/g, '\u00CA'],
+ [/\\"\{E\}/g, '\u00CB'],
+ [/\\`\{I\}/g, '\u00CC'],
+ [/\\'\{I\}/g, '\u00CD'],
+ [/\\\^\{I\}/g, '\u00CE'],
+ [/\\"\{I\}/g, '\u00CF'],
+ [/\\DH /g, '\u00D0'],
+ [/\\~\{N\}/g, '\u00D1'],
+ [/\\`\{O\}/g, '\u00D2'],
+ [/\\'\{O\}/g, '\u00D3'],
+ [/\\\^\{O\}/g, '\u00D4'],
+ [/\\~\{O\}/g, '\u00D5'],
+ [/\\"\{O\}/g, '\u00D6'],
+ [/\\texttimes /g, '\u00D7'],
+ [/\\O /g, '\u00D8'],
+ [/\\`\{U\}/g, '\u00D9'],
+ [/\\'\{U\}/g, '\u00DA'],
+ [/\\\^\{U\}/g, '\u00DB'],
+ [/\\"\{U\}/g, '\u00DC'],
+ [/\\'\{Y\}/g, '\u00DD'],
+ [/\\TH /g, '\u00DE'],
+ [/\\ss /g, '\u00DF'],
+ [/\\`\{a\}/g, '\u00E0'],
+ [/\\'\{a\}/g, '\u00E1'],
+ [/\\\^\{a\}/g, '\u00E2'],
+ [/\\~\{a\}/g, '\u00E3'],
+ [/\\"\{a\}/g, '\u00E4'],
+ [/\\aa /g, '\u00E5'],
+ [/\\ae /g, '\u00E6'],
+ [/\\c\{c\}/g, '\u00E7'],
+ [/\\`\{e\}/g, '\u00E8'],
+ [/\\'\{e\}/g, '\u00E9'],
+ [/\\\^\{e\}/g, '\u00EA'],
+ [/\\"\{e\}/g, '\u00EB'],
+ [/\\`\{\\i\}/g, '\u00EC'],
+ [/\\'\{\\i\}/g, '\u00ED'],
+ [/\\\^\{\\i\}/g, '\u00EE'],
+ [/\\"\{\\i\}/g, '\u00EF'],
+ [/\\dh /g, '\u00F0'],
+ [/\\~\{n\}/g, '\u00F1'],
+ [/\\`\{o\}/g, '\u00F2'],
+ [/\\'\{o\}/g, '\u00F3'],
+ [/\\\^\{o\}/g, '\u00F4'],
+ [/\\~\{o\}/g, '\u00F5'],
+ [/\\"\{o\}/g, '\u00F6'],
+ [/\\div /g, '\u00F7'],
+ [/\\o /g, '\u00F8'],
+ [/\\`\{u\}/g, '\u00F9'],
+ [/\\'\{u\}/g, '\u00FA'],
+ [/\\\^\{u\}/g, '\u00FB'],
+ [/\\"\{u\}/g, '\u00FC'],
+ [/\\'\{y\}/g, '\u00FD'],
+ [/\\th /g, '\u00FE'],
+ [/\\"\{y\}/g, '\u00FF'],
+ [/\\=\{A\}/g, '\u0100'],
+ [/\\=\{a\}/g, '\u0101'],
+ [/\\u\{A\}/g, '\u0102'],
+ [/\\u\{a\}/g, '\u0103'],
+ [/\\k\{A\}/g, '\u0104'],
+ [/\\k\{a\}/g, '\u0105'],
+ [/\\'\{C\}/g, '\u0106'],
+ [/\\'\{c\}/g, '\u0107'],
+ [/\\\^\{C\}/g, '\u0108'],
+ [/\\\^\{c\}/g, '\u0109'],
+ [/\\.\{C\}/g, '\u010A'],
+ [/\\.\{c\}/g, '\u010B'],
+ [/\\v\{C\}/g, '\u010C'],
+ [/\\v\{c\}/g, '\u010D'],
+ [/\\v\{D\}/g, '\u010E'],
+ [/\\v\{d\}/g, '\u010F'],
+ [/\\DJ /g, '\u0110'],
+ [/\\dj /g, '\u0111'],
+ [/\\=\{E\}/g, '\u0112'],
+ [/\\=\{e\}/g, '\u0113'],
+ [/\\u\{E\}/g, '\u0114'],
+ [/\\u\{e\}/g, '\u0115'],
+ [/\\.\{E\}/g, '\u0116'],
+ [/\\.\{e\}/g, '\u0117'],
+ [/\\k\{E\}/g, '\u0118'],
+ [/\\k\{e\}/g, '\u0119'],
+ [/\\v\{E\}/g, '\u011A'],
+ [/\\v\{e\}/g, '\u011B'],
+ [/\\\^\{G\}/g, '\u011C'],
+ [/\\\^\{g\}/g, '\u011D'],
+ [/\\u\{G\}/g, '\u011E'],
+ [/\\u\{g\}/g, '\u011F'],
+ [/\\.\{G\}/g, '\u0120'],
+ [/\\.\{g\}/g, '\u0121'],
+ [/\\c\{G\}/g, '\u0122'],
+ [/\\c\{g\}/g, '\u0123'],
+ [/\\\^\{H\}/g, '\u0124'],
+ [/\\\^\{h\}/g, '\u0125'],
+ [/\\Elzxh /g, '\u0127'],
+ [/\\~\{I\}/g, '\u0128'],
+ [/\\~\{\\i\}/g, '\u0129'],
+ [/\\=\{I\}/g, '\u012A'],
+ [/\\=\{\\i\}/g, '\u012B'],
+ [/\\u\{I\}/g, '\u012C'],
+ [/\\u\{\\i\}/g, '\u012D'],
+ [/\\k\{I\}/g, '\u012E'],
+ [/\\k\{i\}/g, '\u012F'],
+ [/\\.\{I\}/g, '\u0130'],
+ [/\\i /g, '\u0131'],
+ [/\\\^\{J\}/g, '\u0134'],
+ [/\\\^\{\\j\}/g, '\u0135'],
+ [/\\c\{K\}/g, '\u0136'],
+ [/\\c\{k\}/g, '\u0137'],
+ [/\\'\{L\}/g, '\u0139'],
+ [/\\'\{l\}/g, '\u013A'],
+ [/\\c\{L\}/g, '\u013B'],
+ [/\\c\{l\}/g, '\u013C'],
+ [/\\v\{L\}/g, '\u013D'],
+ [/\\v\{l\}/g, '\u013E'],
+ [/\\L /g, '\u0141'],
+ [/\\l /g, '\u0142'],
+ [/\\'\{N\}/g, '\u0143'],
+ [/\\'\{n\}/g, '\u0144'],
+ [/\\c\{N\}/g, '\u0145'],
+ [/\\c\{n\}/g, '\u0146'],
+ [/\\v\{N\}/g, '\u0147'],
+ [/\\v\{n\}/g, '\u0148'],
+ [/\\NG /g, '\u014A'],
+ [/\\ng /g, '\u014B'],
+ [/\\=\{O\}/g, '\u014C'],
+ [/\\=\{o\}/g, '\u014D'],
+ [/\\u\{O\}/g, '\u014E'],
+ [/\\u\{o\}/g, '\u014F'],
+ [/\\H\{O\}/g, '\u0150'],
+ [/\\H\{o\}/g, '\u0151'],
+ [/\\OE /g, '\u0152'],
+ [/\\oe /g, '\u0153'],
+ [/\\'\{R\}/g, '\u0154'],
+ [/\\'\{r\}/g, '\u0155'],
+ [/\\c\{R\}/g, '\u0156'],
+ [/\\c\{r\}/g, '\u0157'],
+ [/\\v\{R\}/g, '\u0158'],
+ [/\\v\{r\}/g, '\u0159'],
+ [/\\'\{S\}/g, '\u015A'],
+ [/\\'\{s\}/g, '\u015B'],
+ [/\\\^\{S\}/g, '\u015C'],
+ [/\\\^\{s\}/g, '\u015D'],
+ [/\\c\{S\}/g, '\u015E'],
+ [/\\c\{s\}/g, '\u015F'],
+ [/\\v\{S\}/g, '\u0160'],
+ [/\\v\{s\}/g, '\u0161'],
+ [/\\c\{T\}/g, '\u0162'],
+ [/\\c\{t\}/g, '\u0163'],
+ [/\\v\{T\}/g, '\u0164'],
+ [/\\v\{t\}/g, '\u0165'],
+ [/\\~\{U\}/g, '\u0168'],
+ [/\\~\{u\}/g, '\u0169'],
+ [/\\=\{U\}/g, '\u016A'],
+ [/\\=\{u\}/g, '\u016B'],
+ [/\\u\{U\}/g, '\u016C'],
+ [/\\u\{u\}/g, '\u016D'],
+ [/\\r\{U\}/g, '\u016E'],
+ [/\\r\{u\}/g, '\u016F'],
+ [/\\H\{U\}/g, '\u0170'],
+ [/\\H\{u\}/g, '\u0171'],
+ [/\\k\{U\}/g, '\u0172'],
+ [/\\k\{u\}/g, '\u0173'],
+ [/\\\^\{W\}/g, '\u0174'],
+ [/\\\^\{w\}/g, '\u0175'],
+ [/\\\^\{Y\}/g, '\u0176'],
+ [/\\\^\{y\}/g, '\u0177'],
+ [/\\"\{Y\}/g, '\u0178'],
+ [/\\'\{Z\}/g, '\u0179'],
+ [/\\'\{z\}/g, '\u017A'],
+ [/\\.\{Z\}/g, '\u017B'],
+ [/\\.\{z\}/g, '\u017C'],
+ [/\\v\{Z\}/g, '\u017D'],
+ [/\\v\{z\}/g, '\u017E'],
+ [/\\texthvlig /g, '\u0195'],
+ [/\\textnrleg /g, '\u019E'],
+ [/\\eth /g, '\u01AA'],
+ [/\\textdoublepipe /g, '\u01C2'],
+ [/\\'\{g\}/g, '\u01F5'],
+ [/\\Elztrna /g, '\u0250'],
+ [/\\Elztrnsa /g, '\u0252'],
+ [/\\Elzopeno /g, '\u0254'],
+ [/\\Elzrtld /g, '\u0256'],
+ [/\\Elzschwa /g, '\u0259'],
+ [/\\varepsilon /g, '\u025B'],
+ [/\\Elzpgamma /g, '\u0263'],
+ [/\\Elzpbgam /g, '\u0264'],
+ [/\\Elztrnh /g, '\u0265'],
+ [/\\Elzbtdl /g, '\u026C'],
+ [/\\Elzrtll /g, '\u026D'],
+ [/\\Elztrnm /g, '\u026F'],
+ [/\\Elztrnmlr /g, '\u0270'],
+ [/\\Elzltlmr /g, '\u0271'],
+ [/\\Elzltln /g, '\u0272'],
+ [/\\Elzrtln /g, '\u0273'],
+ [/\\Elzclomeg /g, '\u0277'],
+ [/\\textphi /g, '\u0278'],
+ [/\\Elztrnr /g, '\u0279'],
+ [/\\Elztrnrl /g, '\u027A'],
+ [/\\Elzrttrnr /g, '\u027B'],
+ [/\\Elzrl /g, '\u027C'],
+ [/\\Elzrtlr /g, '\u027D'],
+ [/\\Elzfhr /g, '\u027E'],
+ [/\\Elzrtls /g, '\u0282'],
+ [/\\Elzesh /g, '\u0283'],
+ [/\\Elztrnt /g, '\u0287'],
+ [/\\Elzrtlt /g, '\u0288'],
+ [/\\Elzpupsil /g, '\u028A'],
+ [/\\Elzpscrv /g, '\u028B'],
+ [/\\Elzinvv /g, '\u028C'],
+ [/\\Elzinvw /g, '\u028D'],
+ [/\\Elztrny /g, '\u028E'],
+ [/\\Elzrtlz /g, '\u0290'],
+ [/\\Elzyogh /g, '\u0292'],
+ [/\\Elzglst /g, '\u0294'],
+ [/\\Elzreglst /g, '\u0295'],
+ [/\\Elzinglst /g, '\u0296'],
+ [/\\textturnk /g, '\u029E'],
+ [/\\Elzdyogh /g, '\u02A4'],
+ [/\\Elztesh /g, '\u02A7'],
+ [/\\textasciicaron /g, '\u02C7'],
+ [/\\Elzverts /g, '\u02C8'],
+ [/\\Elzverti /g, '\u02CC'],
+ [/\\Elzlmrk /g, '\u02D0'],
+ [/\\Elzhlmrk /g, '\u02D1'],
+ [/\\Elzsbrhr /g, '\u02D2'],
+ [/\\Elzsblhr /g, '\u02D3'],
+ [/\\Elzrais /g, '\u02D4'],
+ [/\\Elzlow /g, '\u02D5'],
+ [/\\textasciibreve /g, '\u02D8'],
+ [/\\textperiodcentered /g, '\u02D9'],
+ [/\\r\{\}/g, '\u02DA'],
+ [/\\k\{\}/g, '\u02DB'],
+ [/\\texttildelow /g, '\u02DC'],
+ [/\\H\{\}/g, '\u02DD'],
+ [/\\tone\{55\}/g, '\u02E5'],
+ [/\\tone\{44\}/g, '\u02E6'],
+ [/\\tone\{33\}/g, '\u02E7'],
+ [/\\tone\{22\}/g, '\u02E8'],
+ [/\\tone\{11\}/g, '\u02E9'],
+ [/\\cyrchar\\C/g, '\u030F'],
+ [/\\Elzpalh /g, '\u0321'],
+ [/\\Elzrh /g, '\u0322'],
+ [/\\Elzsbbrg /g, '\u032A'],
+ [/\\Elzxl /g, '\u0335'],
+ [/\\Elzbar /g, '\u0336'],
+ [/\\'\{A\}/g, '\u0386'],
+ [/\\'\{E\}/g, '\u0388'],
+ [/\\'\{H\}/g, '\u0389'],
+ [/\\'\{\}\{I\}/g, '\u038A'],
+ [/\\'\{\}O/g, '\u038C'],
+ [/\\mathrm\{'Y\}/g, '\u038E'],
+ [/\\mathrm\{'\\Omega\}/g, '\u038F'],
+ [/\\acute\{\\ddot\{\\iota\}\}/g, '\u0390'],
+ [/\\Alpha /g, '\u0391'],
+ [/\\Beta /g, '\u0392'],
+ [/\\Gamma /g, '\u0393'],
+ [/\\Delta /g, '\u0394'],
+ [/\\Epsilon /g, '\u0395'],
+ [/\\Zeta /g, '\u0396'],
+ [/\\Eta /g, '\u0397'],
+ [/\\Theta /g, '\u0398'],
+ [/\\Iota /g, '\u0399'],
+ [/\\Kappa /g, '\u039A'],
+ [/\\Lambda /g, '\u039B'],
+ [/\\Xi /g, '\u039E'],
+ [/\\Pi /g, '\u03A0'],
+ [/\\Rho /g, '\u03A1'],
+ [/\\Sigma /g, '\u03A3'],
+ [/\\Tau /g, '\u03A4'],
+ [/\\Upsilon /g, '\u03A5'],
+ [/\\Phi /g, '\u03A6'],
+ [/\\Chi /g, '\u03A7'],
+ [/\\Psi /g, '\u03A8'],
+ [/\\Omega /g, '\u03A9'],
+ [/\\mathrm\{\\ddot\{I\}\}/g, '\u03AA'],
+ [/\\mathrm\{\\ddot\{Y\}\}/g, '\u03AB'],
+ [/\\'\{\$\\alpha\$\}/g, '\u03AC'],
+ [/\\acute\{\\epsilon\}/g, '\u03AD'],
+ [/\\acute\{\\eta\}/g, '\u03AE'],
+ [/\\acute\{\\iota\}/g, '\u03AF'],
+ [/\\acute\{\\ddot\{\\upsilon\}\}/g, '\u03B0'],
+ [/\\alpha /g, '\u03B1'],
+ [/\\beta /g, '\u03B2'],
+ [/\\gamma /g, '\u03B3'],
+ [/\\delta /g, '\u03B4'],
+ [/\\epsilon /g, '\u03B5'],
+ [/\\zeta /g, '\u03B6'],
+ [/\\eta /g, '\u03B7'],
+ [/\\texttheta /g, '\u03B8'],
+ [/\\iota /g, '\u03B9'],
+ [/\\kappa /g, '\u03BA'],
+ [/\\lambda /g, '\u03BB'],
+ [/\\mu /g, '\u03BC'],
+ [/\\nu /g, '\u03BD'],
+ [/\\xi /g, '\u03BE'],
+ [/\\pi /g, '\u03C0'],
+ [/\\rho /g, '\u03C1'],
+ [/\\varsigma /g, '\u03C2'],
+ [/\\sigma /g, '\u03C3'],
+ [/\\tau /g, '\u03C4'],
+ [/\\upsilon /g, '\u03C5'],
+ [/\\varphi /g, '\u03C6'],
+ [/\\chi /g, '\u03C7'],
+ [/\\psi /g, '\u03C8'],
+ [/\\omega /g, '\u03C9'],
+ [/\\ddot\{\\iota\}/g, '\u03CA'],
+ [/\\ddot\{\\upsilon\}/g, '\u03CB'],
+ [/\\'\{o\}/g, '\u03CC'],
+ [/\\acute\{\\upsilon\}/g, '\u03CD'],
+ [/\\acute\{\\omega\}/g, '\u03CE'],
+ [/\\Pisymbol\{ppi022\}\{87\}/g, '\u03D0'],
+ [/\\textvartheta /g, '\u03D1'],
+ [/\\Upsilon /g, '\u03D2'],
+ [/\\phi /g, '\u03D5'],
+ [/\\varpi /g, '\u03D6'],
+ [/\\Stigma /g, '\u03DA'],
+ [/\\Digamma /g, '\u03DC'],
+ [/\\digamma /g, '\u03DD'],
+ [/\\Koppa /g, '\u03DE'],
+ [/\\Sampi /g, '\u03E0'],
+ [/\\varkappa /g, '\u03F0'],
+ [/\\varrho /g, '\u03F1'],
+ [/\\textTheta /g, '\u03F4'],
+ [/\\backepsilon /g, '\u03F6'],
+ [/\\cyrchar\\CYRYO /g, '\u0401'],
+ [/\\cyrchar\\CYRDJE /g, '\u0402'],
+ [/\\cyrchar\{\\'\\CYRG\}/g, '\u0403'],
+ [/\\cyrchar\\CYRIE /g, '\u0404'],
+ [/\\cyrchar\\CYRDZE /g, '\u0405'],
+ [/\\cyrchar\\CYRII /g, '\u0406'],
+ [/\\cyrchar\\CYRYI /g, '\u0407'],
+ [/\\cyrchar\\CYRJE /g, '\u0408'],
+ [/\\cyrchar\\CYRLJE /g, '\u0409'],
+ [/\\cyrchar\\CYRNJE /g, '\u040A'],
+ [/\\cyrchar\\CYRTSHE /g, '\u040B'],
+ [/\\cyrchar\{\\'\\CYRK\}/g, '\u040C'],
+ [/\\cyrchar\\CYRUSHRT /g, '\u040E'],
+ [/\\cyrchar\\CYRDZHE /g, '\u040F'],
+ [/\\cyrchar\\CYRA /g, '\u0410'],
+ [/\\cyrchar\\CYRB /g, '\u0411'],
+ [/\\cyrchar\\CYRV /g, '\u0412'],
+ [/\\cyrchar\\CYRG /g, '\u0413'],
+ [/\\cyrchar\\CYRD /g, '\u0414'],
+ [/\\cyrchar\\CYRE /g, '\u0415'],
+ [/\\cyrchar\\CYRZH /g, '\u0416'],
+ [/\\cyrchar\\CYRZ /g, '\u0417'],
+ [/\\cyrchar\\CYRI /g, '\u0418'],
+ [/\\cyrchar\\CYRISHRT /g, '\u0419'],
+ [/\\cyrchar\\CYRK /g, '\u041A'],
+ [/\\cyrchar\\CYRL /g, '\u041B'],
+ [/\\cyrchar\\CYRM /g, '\u041C'],
+ [/\\cyrchar\\CYRN /g, '\u041D'],
+ [/\\cyrchar\\CYRO /g, '\u041E'],
+ [/\\cyrchar\\CYRP /g, '\u041F'],
+ [/\\cyrchar\\CYRR /g, '\u0420'],
+ [/\\cyrchar\\CYRS /g, '\u0421'],
+ [/\\cyrchar\\CYRT /g, '\u0422'],
+ [/\\cyrchar\\CYRU /g, '\u0423'],
+ [/\\cyrchar\\CYRF /g, '\u0424'],
+ [/\\cyrchar\\CYRH /g, '\u0425'],
+ [/\\cyrchar\\CYRC /g, '\u0426'],
+ [/\\cyrchar\\CYRCH /g, '\u0427'],
+ [/\\cyrchar\\CYRSH /g, '\u0428'],
+ [/\\cyrchar\\CYRSHCH /g, '\u0429'],
+ [/\\cyrchar\\CYRHRDSN /g, '\u042A'],
+ [/\\cyrchar\\CYRERY /g, '\u042B'],
+ [/\\cyrchar\\CYRSFTSN /g, '\u042C'],
+ [/\\cyrchar\\CYREREV /g, '\u042D'],
+ [/\\cyrchar\\CYRYU /g, '\u042E'],
+ [/\\cyrchar\\CYRYA /g, '\u042F'],
+ [/\\cyrchar\\cyra /g, '\u0430'],
+ [/\\cyrchar\\cyrb /g, '\u0431'],
+ [/\\cyrchar\\cyrv /g, '\u0432'],
+ [/\\cyrchar\\cyrg /g, '\u0433'],
+ [/\\cyrchar\\cyrd /g, '\u0434'],
+ [/\\cyrchar\\cyre /g, '\u0435'],
+ [/\\cyrchar\\cyrzh /g, '\u0436'],
+ [/\\cyrchar\\cyrz /g, '\u0437'],
+ [/\\cyrchar\\cyri /g, '\u0438'],
+ [/\\cyrchar\\cyrishrt /g, '\u0439'],
+ [/\\cyrchar\\cyrk /g, '\u043A'],
+ [/\\cyrchar\\cyrl /g, '\u043B'],
+ [/\\cyrchar\\cyrm /g, '\u043C'],
+ [/\\cyrchar\\cyrn /g, '\u043D'],
+ [/\\cyrchar\\cyro /g, '\u043E'],
+ [/\\cyrchar\\cyrp /g, '\u043F'],
+ [/\\cyrchar\\cyrr /g, '\u0440'],
+ [/\\cyrchar\\cyrs /g, '\u0441'],
+ [/\\cyrchar\\cyrt /g, '\u0442'],
+ [/\\cyrchar\\cyru /g, '\u0443'],
+ [/\\cyrchar\\cyrf /g, '\u0444'],
+ [/\\cyrchar\\cyrh /g, '\u0445'],
+ [/\\cyrchar\\cyrc /g, '\u0446'],
+ [/\\cyrchar\\cyrch /g, '\u0447'],
+ [/\\cyrchar\\cyrsh /g, '\u0448'],
+ [/\\cyrchar\\cyrshch /g, '\u0449'],
+ [/\\cyrchar\\cyrhrdsn /g, '\u044A'],
+ [/\\cyrchar\\cyrery /g, '\u044B'],
+ [/\\cyrchar\\cyrsftsn /g, '\u044C'],
+ [/\\cyrchar\\cyrerev /g, '\u044D'],
+ [/\\cyrchar\\cyryu /g, '\u044E'],
+ [/\\cyrchar\\cyrya /g, '\u044F'],
+ [/\\cyrchar\\cyryo /g, '\u0451'],
+ [/\\cyrchar\\cyrdje /g, '\u0452'],
+ [/\\cyrchar\{\\'\\cyrg\}/g, '\u0453'],
+ [/\\cyrchar\\cyrie /g, '\u0454'],
+ [/\\cyrchar\\cyrdze /g, '\u0455'],
+ [/\\cyrchar\\cyrii /g, '\u0456'],
+ [/\\cyrchar\\cyryi /g, '\u0457'],
+ [/\\cyrchar\\cyrje /g, '\u0458'],
+ [/\\cyrchar\\cyrlje /g, '\u0459'],
+ [/\\cyrchar\\cyrnje /g, '\u045A'],
+ [/\\cyrchar\\cyrtshe /g, '\u045B'],
+ [/\\cyrchar\{\\'\\cyrk\}/g, '\u045C'],
+ [/\\cyrchar\\cyrushrt /g, '\u045E'],
+ [/\\cyrchar\\cyrdzhe /g, '\u045F'],
+ [/\\cyrchar\\CYROMEGA /g, '\u0460'],
+ [/\\cyrchar\\cyromega /g, '\u0461'],
+ [/\\cyrchar\\CYRYAT /g, '\u0462'],
+ [/\\cyrchar\\CYRIOTE /g, '\u0464'],
+ [/\\cyrchar\\cyriote /g, '\u0465'],
+ [/\\cyrchar\\CYRLYUS /g, '\u0466'],
+ [/\\cyrchar\\cyrlyus /g, '\u0467'],
+ [/\\cyrchar\\CYRIOTLYUS /g, '\u0468'],
+ [/\\cyrchar\\cyriotlyus /g, '\u0469'],
+ [/\\cyrchar\\CYRBYUS /g, '\u046A'],
+ [/\\cyrchar\\CYRIOTBYUS /g, '\u046C'],
+ [/\\cyrchar\\cyriotbyus /g, '\u046D'],
+ [/\\cyrchar\\CYRKSI /g, '\u046E'],
+ [/\\cyrchar\\cyrksi /g, '\u046F'],
+ [/\\cyrchar\\CYRPSI /g, '\u0470'],
+ [/\\cyrchar\\cyrpsi /g, '\u0471'],
+ [/\\cyrchar\\CYRFITA /g, '\u0472'],
+ [/\\cyrchar\\CYRIZH /g, '\u0474'],
+ [/\\cyrchar\\CYRUK /g, '\u0478'],
+ [/\\cyrchar\\cyruk /g, '\u0479'],
+ [/\\cyrchar\\CYROMEGARND /g, '\u047A'],
+ [/\\cyrchar\\cyromegarnd /g, '\u047B'],
+ [/\\cyrchar\\CYROMEGATITLO /g, '\u047C'],
+ [/\\cyrchar\\cyromegatitlo /g, '\u047D'],
+ [/\\cyrchar\\CYROT /g, '\u047E'],
+ [/\\cyrchar\\cyrot /g, '\u047F'],
+ [/\\cyrchar\\CYRKOPPA /g, '\u0480'],
+ [/\\cyrchar\\cyrkoppa /g, '\u0481'],
+ [/\\cyrchar\\cyrthousands /g, '\u0482'],
+ [/\\cyrchar\\cyrhundredthousands /g, '\u0488'],
+ [/\\cyrchar\\cyrmillions /g, '\u0489'],
+ [/\\cyrchar\\CYRSEMISFTSN /g, '\u048C'],
+ [/\\cyrchar\\cyrsemisftsn /g, '\u048D'],
+ [/\\cyrchar\\CYRRTICK /g, '\u048E'],
+ [/\\cyrchar\\cyrrtick /g, '\u048F'],
+ [/\\cyrchar\\CYRGUP /g, '\u0490'],
+ [/\\cyrchar\\cyrgup /g, '\u0491'],
+ [/\\cyrchar\\CYRGHCRS /g, '\u0492'],
+ [/\\cyrchar\\cyrghcrs /g, '\u0493'],
+ [/\\cyrchar\\CYRGHK /g, '\u0494'],
+ [/\\cyrchar\\cyrghk /g, '\u0495'],
+ [/\\cyrchar\\CYRZHDSC /g, '\u0496'],
+ [/\\cyrchar\\cyrzhdsc /g, '\u0497'],
+ [/\\cyrchar\\CYRZDSC /g, '\u0498'],
+ [/\\cyrchar\\cyrzdsc /g, '\u0499'],
+ [/\\cyrchar\\CYRKDSC /g, '\u049A'],
+ [/\\cyrchar\\cyrkdsc /g, '\u049B'],
+ [/\\cyrchar\\CYRKVCRS /g, '\u049C'],
+ [/\\cyrchar\\cyrkvcrs /g, '\u049D'],
+ [/\\cyrchar\\CYRKHCRS /g, '\u049E'],
+ [/\\cyrchar\\cyrkhcrs /g, '\u049F'],
+ [/\\cyrchar\\CYRKBEAK /g, '\u04A0'],
+ [/\\cyrchar\\cyrkbeak /g, '\u04A1'],
+ [/\\cyrchar\\CYRNDSC /g, '\u04A2'],
+ [/\\cyrchar\\cyrndsc /g, '\u04A3'],
+ [/\\cyrchar\\CYRNG /g, '\u04A4'],
+ [/\\cyrchar\\cyrng /g, '\u04A5'],
+ [/\\cyrchar\\CYRPHK /g, '\u04A6'],
+ [/\\cyrchar\\cyrphk /g, '\u04A7'],
+ [/\\cyrchar\\CYRABHHA /g, '\u04A8'],
+ [/\\cyrchar\\cyrabhha /g, '\u04A9'],
+ [/\\cyrchar\\CYRSDSC /g, '\u04AA'],
+ [/\\cyrchar\\cyrsdsc /g, '\u04AB'],
+ [/\\cyrchar\\CYRTDSC /g, '\u04AC'],
+ [/\\cyrchar\\cyrtdsc /g, '\u04AD'],
+ [/\\cyrchar\\CYRY /g, '\u04AE'],
+ [/\\cyrchar\\cyry /g, '\u04AF'],
+ [/\\cyrchar\\CYRYHCRS /g, '\u04B0'],
+ [/\\cyrchar\\cyryhcrs /g, '\u04B1'],
+ [/\\cyrchar\\CYRHDSC /g, '\u04B2'],
+ [/\\cyrchar\\cyrhdsc /g, '\u04B3'],
+ [/\\cyrchar\\CYRTETSE /g, '\u04B4'],
+ [/\\cyrchar\\cyrtetse /g, '\u04B5'],
+ [/\\cyrchar\\CYRCHRDSC /g, '\u04B6'],
+ [/\\cyrchar\\cyrchrdsc /g, '\u04B7'],
+ [/\\cyrchar\\CYRCHVCRS /g, '\u04B8'],
+ [/\\cyrchar\\cyrchvcrs /g, '\u04B9'],
+ [/\\cyrchar\\CYRSHHA /g, '\u04BA'],
+ [/\\cyrchar\\cyrshha /g, '\u04BB'],
+ [/\\cyrchar\\CYRABHCH /g, '\u04BC'],
+ [/\\cyrchar\\cyrabhch /g, '\u04BD'],
+ [/\\cyrchar\\CYRABHCHDSC /g, '\u04BE'],
+ [/\\cyrchar\\cyrabhchdsc /g, '\u04BF'],
+ [/\\cyrchar\\CYRpalochka /g, '\u04C0'],
+ [/\\cyrchar\\CYRKHK /g, '\u04C3'],
+ [/\\cyrchar\\cyrkhk /g, '\u04C4'],
+ [/\\cyrchar\\CYRNHK /g, '\u04C7'],
+ [/\\cyrchar\\cyrnhk /g, '\u04C8'],
+ [/\\cyrchar\\CYRCHLDSC /g, '\u04CB'],
+ [/\\cyrchar\\cyrchldsc /g, '\u04CC'],
+ [/\\cyrchar\\CYRAE /g, '\u04D4'],
+ [/\\cyrchar\\cyrae /g, '\u04D5'],
+ [/\\cyrchar\\CYRSCHWA /g, '\u04D8'],
+ [/\\cyrchar\\cyrschwa /g, '\u04D9'],
+ [/\\cyrchar\\CYRABHDZE /g, '\u04E0'],
+ [/\\cyrchar\\cyrabhdze /g, '\u04E1'],
+ [/\\cyrchar\\CYROTLD /g, '\u04E8'],
+ [/\\cyrchar\\cyrotld /g, '\u04E9'],
+ [/\\hspace\{0.6em\}/g, '\u2002'],
+ [/\\hspace\{1em\}/g, '\u2003'],
+ [/\\hspace\{0.33em\}/g, '\u2004'],
+ [/\\hspace\{0.25em\}/g, '\u2005'],
+ [/\\hspace\{0.166em\}/g, '\u2006'],
+ [/\\hphantom\{0\}/g, '\u2007'],
+ [/\\hphantom\{,\}/g, '\u2008'],
+ [/\\hspace\{0.167em\}/g, '\u2009'],
+ [/\\mkern1mu /g, '\u200A'],
+ [/\\textendash /g, '\u2013'],
+ [/\\textemdash /g, '\u2014'],
+ [/\\rule\{1em\}\{1pt\}/g, '\u2015'],
+ [/\\Vert /g, '\u2016'],
+ [/\\Elzreapos /g, '\u201B'],
+ [/\\textquotedblleft /g, '\u201C'],
+ [/\\textquotedblright /g, '\u201D'],
+ [/\\textdagger /g, '\u2020'],
+ [/\\textdaggerdbl /g, '\u2021'],
+ [/\\textbullet /g, '\u2022'],
+ [/\\ldots /g, '\u2026'],
+ [/\\textperthousand /g, '\u2030'],
+ [/\\textpertenthousand /g, '\u2031'],
+ [/\\backprime /g, '\u2035'],
+ [/\\guilsinglleft /g, '\u2039'],
+ [/\\guilsinglright /g, '\u203A'],
+ [/\\mkern4mu /g, '\u205F'],
+ [/\\nolinebreak /g, '\u2060'],
+ [/\\ensuremath\{\\Elzpes\}/g, '\u20A7'],
+ [/\\mbox\{\\texteuro\} /g, '\u20AC'],
+ [/\\dddot /g, '\u20DB'],
+ [/\\ddddot /g, '\u20DC'],
+ [/\\mathbb\{C\}/g, '\u2102'],
+ [/\\mathscr\{g\}/g, '\u210A'],
+ [/\\mathscr\{H\}/g, '\u210B'],
+ [/\\mathfrak\{H\}/g, '\u210C'],
+ [/\\mathbb\{H\}/g, '\u210D'],
+ [/\\hslash /g, '\u210F'],
+ [/\\mathscr\{I\}/g, '\u2110'],
+ [/\\mathfrak\{I\}/g, '\u2111'],
+ [/\\mathscr\{L\}/g, '\u2112'],
+ [/\\mathscr\{l\}/g, '\u2113'],
+ [/\\mathbb\{N\}/g, '\u2115'],
+ [/\\cyrchar\\textnumero /g, '\u2116'],
+ [/\\wp /g, '\u2118'],
+ [/\\mathbb\{P\}/g, '\u2119'],
+ [/\\mathbb\{Q\}/g, '\u211A'],
+ [/\\mathscr\{R\}/g, '\u211B'],
+ [/\\mathfrak\{R\}/g, '\u211C'],
+ [/\\mathbb\{R\}/g, '\u211D'],
+ [/\\Elzxrat /g, '\u211E'],
+ [/\\texttrademark /g, '\u2122'],
+ [/\\mathbb\{Z\}/g, '\u2124'],
+ [/\\Omega /g, '\u2126'],
+ [/\\mho /g, '\u2127'],
+ [/\\mathfrak\{Z\}/g, '\u2128'],
+ [/\\ElsevierGlyph\{2129\}/g, '\u2129'],
+ [/\\AA /g, '\u212B'],
+ [/\\mathscr\{B\}/g, '\u212C'],
+ [/\\mathfrak\{C\}/g, '\u212D'],
+ [/\\mathscr\{e\}/g, '\u212F'],
+ [/\\mathscr\{E\}/g, '\u2130'],
+ [/\\mathscr\{F\}/g, '\u2131'],
+ [/\\mathscr\{M\}/g, '\u2133'],
+ [/\\mathscr\{o\}/g, '\u2134'],
+ [/\\aleph /g, '\u2135'],
+ [/\\beth /g, '\u2136'],
+ [/\\gimel /g, '\u2137'],
+ [/\\daleth /g, '\u2138'],
+ [/\\textfrac\{1\}\{3\}/g, '\u2153'],
+ [/\\textfrac\{2\}\{3\}/g, '\u2154'],
+ [/\\textfrac\{1\}\{5\}/g, '\u2155'],
+ [/\\textfrac\{2\}\{5\}/g, '\u2156'],
+ [/\\textfrac\{3\}\{5\}/g, '\u2157'],
+ [/\\textfrac\{4\}\{5\}/g, '\u2158'],
+ [/\\textfrac\{1\}\{6\}/g, '\u2159'],
+ [/\\textfrac\{5\}\{6\}/g, '\u215A'],
+ [/\\textfrac\{1\}\{8\}/g, '\u215B'],
+ [/\\textfrac\{3\}\{8\}/g, '\u215C'],
+ [/\\textfrac\{5\}\{8\}/g, '\u215D'],
+ [/\\textfrac\{7\}\{8\}/g, '\u215E'],
+ [/\\leftarrow /g, '\u2190'],
+ [/\\uparrow /g, '\u2191'],
+ [/\\rightarrow /g, '\u2192'],
+ [/\\downarrow /g, '\u2193'],
+ [/\\leftrightarrow /g, '\u2194'],
+ [/\\updownarrow /g, '\u2195'],
+ [/\\nwarrow /g, '\u2196'],
+ [/\\nearrow /g, '\u2197'],
+ [/\\searrow /g, '\u2198'],
+ [/\\swarrow /g, '\u2199'],
+ [/\\nleftarrow /g, '\u219A'],
+ [/\\nrightarrow /g, '\u219B'],
+ [/\\arrowwaveright /g, '\u219C'],
+ [/\\arrowwaveright /g, '\u219D'],
+ [/\\twoheadleftarrow /g, '\u219E'],
+ [/\\twoheadrightarrow /g, '\u21A0'],
+ [/\\leftarrowtail /g, '\u21A2'],
+ [/\\rightarrowtail /g, '\u21A3'],
+ [/\\mapsto /g, '\u21A6'],
+ [/\\hookleftarrow /g, '\u21A9'],
+ [/\\hookrightarrow /g, '\u21AA'],
+ [/\\looparrowleft /g, '\u21AB'],
+ [/\\looparrowright /g, '\u21AC'],
+ [/\\leftrightsquigarrow /g, '\u21AD'],
+ [/\\nleftrightarrow /g, '\u21AE'],
+ [/\\Lsh /g, '\u21B0'],
+ [/\\Rsh /g, '\u21B1'],
+ [/\\ElsevierGlyph\{21B3\}/g, '\u21B3'],
+ [/\\curvearrowleft /g, '\u21B6'],
+ [/\\curvearrowright /g, '\u21B7'],
+ [/\\circlearrowleft /g, '\u21BA'],
+ [/\\circlearrowright /g, '\u21BB'],
+ [/\\leftharpoonup /g, '\u21BC'],
+ [/\\leftharpoondown /g, '\u21BD'],
+ [/\\upharpoonright /g, '\u21BE'],
+ [/\\upharpoonleft /g, '\u21BF'],
+ [/\\rightharpoonup /g, '\u21C0'],
+ [/\\rightharpoondown /g, '\u21C1'],
+ [/\\downharpoonright /g, '\u21C2'],
+ [/\\downharpoonleft /g, '\u21C3'],
+ [/\\rightleftarrows /g, '\u21C4'],
+ [/\\dblarrowupdown /g, '\u21C5'],
+ [/\\leftrightarrows /g, '\u21C6'],
+ [/\\leftleftarrows /g, '\u21C7'],
+ [/\\upuparrows /g, '\u21C8'],
+ [/\\rightrightarrows /g, '\u21C9'],
+ [/\\downdownarrows /g, '\u21CA'],
+ [/\\leftrightharpoons /g, '\u21CB'],
+ [/\\rightleftharpoons /g, '\u21CC'],
+ [/\\nLeftarrow /g, '\u21CD'],
+ [/\\nLeftrightarrow /g, '\u21CE'],
+ [/\\nRightarrow /g, '\u21CF'],
+ [/\\Leftarrow /g, '\u21D0'],
+ [/\\Uparrow /g, '\u21D1'],
+ [/\\Rightarrow /g, '\u21D2'],
+ [/\\Downarrow /g, '\u21D3'],
+ [/\\Leftrightarrow /g, '\u21D4'],
+ [/\\Updownarrow /g, '\u21D5'],
+ [/\\Lleftarrow /g, '\u21DA'],
+ [/\\Rrightarrow /g, '\u21DB'],
+ [/\\rightsquigarrow /g, '\u21DD'],
+ [/\\DownArrowUpArrow /g, '\u21F5'],
+ [/\\forall /g, '\u2200'],
+ [/\\complement /g, '\u2201'],
+ [/\\partial /g, '\u2202'],
+ [/\\exists /g, '\u2203'],
+ [/\\nexists /g, '\u2204'],
+ [/\\varnothing /g, '\u2205'],
+ [/\\nabla /g, '\u2207'],
+ [/\\in /g, '\u2208'],
+ [/\\not\\in /g, '\u2209'],
+ [/\\ni /g, '\u220B'],
+ [/\\not\\ni /g, '\u220C'],
+ [/\\prod /g, '\u220F'],
+ [/\\coprod /g, '\u2210'],
+ [/\\sum /g, '\u2211'],
+ [/\\mp /g, '\u2213'],
+ [/\\dotplus /g, '\u2214'],
+ [/\\setminus /g, '\u2216'],
+ [/\\circ /g, '\u2218'],
+ [/\\bullet /g, '\u2219'],
+ [/\\surd /g, '\u221A'],
+ [/\\propto /g, '\u221D'],
+ [/\\infty /g, '\u221E'],
+ [/\\rightangle /g, '\u221F'],
+ [/\\angle /g, '\u2220'],
+ [/\\measuredangle /g, '\u2221'],
+ [/\\sphericalangle /g, '\u2222'],
+ [/\\mid /g, '\u2223'],
+ [/\\nmid /g, '\u2224'],
+ [/\\parallel /g, '\u2225'],
+ [/\\nparallel /g, '\u2226'],
+ [/\\wedge /g, '\u2227'],
+ [/\\vee /g, '\u2228'],
+ [/\\cap /g, '\u2229'],
+ [/\\cup /g, '\u222A'],
+ [/\\int /g, '\u222B'],
+ [/\\int\\!\\int /g, '\u222C'],
+ [/\\int\\!\\int\\!\\int /g, '\u222D'],
+ [/\\oint /g, '\u222E'],
+ [/\\surfintegral /g, '\u222F'],
+ [/\\volintegral /g, '\u2230'],
+ [/\\clwintegral /g, '\u2231'],
+ [/\\ElsevierGlyph\{2232\}/g, '\u2232'],
+ [/\\ElsevierGlyph\{2233\}/g, '\u2233'],
+ [/\\therefore /g, '\u2234'],
+ [/\\because /g, '\u2235'],
+ [/\\Colon /g, '\u2237'],
+ [/\\ElsevierGlyph\{2238\}/g, '\u2238'],
+ [/\\mathbin\{\{:\}\\!\\!\{\-\}\\!\\!\{:\}\}/g, '\u223A'],
+ [/\\homothetic /g, '\u223B'],
+ [/\\sim /g, '\u223C'],
+ [/\\backsim /g, '\u223D'],
+ [/\\lazysinv /g, '\u223E'],
+ [/\\wr /g, '\u2240'],
+ [/\\not\\sim /g, '\u2241'],
+ [/\\ElsevierGlyph\{2242\}/g, '\u2242'],
+ [/\\NotEqualTilde /g, '\u2242-00338'],
+ [/\\simeq /g, '\u2243'],
+ [/\\not\\simeq /g, '\u2244'],
+ [/\\cong /g, '\u2245'],
+ [/\\approxnotequal /g, '\u2246'],
+ [/\\not\\cong /g, '\u2247'],
+ [/\\approx /g, '\u2248'],
+ [/\\not\\approx /g, '\u2249'],
+ [/\\approxeq /g, '\u224A'],
+ [/\\tildetrpl /g, '\u224B'],
+ [/\\not\\apid /g, '\u224B-00338'],
+ [/\\allequal /g, '\u224C'],
+ [/\\asymp /g, '\u224D'],
+ [/\\Bumpeq /g, '\u224E'],
+ [/\\NotHumpDownHump /g, '\u224E-00338'],
+ [/\\bumpeq /g, '\u224F'],
+ [/\\NotHumpEqual /g, '\u224F-00338'],
+ [/\\doteq /g, '\u2250'],
+ [/\\not\\doteq/g, '\u2250-00338'],
+ [/\\doteqdot /g, '\u2251'],
+ [/\\fallingdotseq /g, '\u2252'],
+ [/\\risingdotseq /g, '\u2253'],
+ [/\\eqcirc /g, '\u2256'],
+ [/\\circeq /g, '\u2257'],
+ [/\\estimates /g, '\u2259'],
+ [/\\ElsevierGlyph\{225A\}/g, '\u225A'],
+ [/\\starequal /g, '\u225B'],
+ [/\\triangleq /g, '\u225C'],
+ [/\\ElsevierGlyph\{225F\}/g, '\u225F'],
+ [/\\not =/g, '\u2260'],
+ [/\\equiv /g, '\u2261'],
+ [/\\not\\equiv /g, '\u2262'],
+ [/\\leq /g, '\u2264'],
+ [/\\geq /g, '\u2265'],
+ [/\\leqq /g, '\u2266'],
+ [/\\geqq /g, '\u2267'],
+ [/\\lneqq /g, '\u2268'],
+ [/\\lvertneqq /g, '\u2268-0FE00'],
+ [/\\gneqq /g, '\u2269'],
+ [/\\gvertneqq /g, '\u2269-0FE00'],
+ [/\\ll /g, '\u226A'],
+ [/\\NotLessLess /g, '\u226A-00338'],
+ [/\\gg /g, '\u226B'],
+ [/\\NotGreaterGreater /g, '\u226B-00338'],
+ [/\\between /g, '\u226C'],
+ [/\\not\\kern\-0.3em\\times /g, '\u226D'],
+ [/\\not/g, '\u226F'],
+ [/\\not\\leq /g, '\u2270'],
+ [/\\not\\geq /g, '\u2271'],
+ [/\\lessequivlnt /g, '\u2272'],
+ [/\\greaterequivlnt /g, '\u2273'],
+ [/\\ElsevierGlyph\{2274\}/g, '\u2274'],
+ [/\\ElsevierGlyph\{2275\}/g, '\u2275'],
+ [/\\lessgtr /g, '\u2276'],
+ [/\\gtrless /g, '\u2277'],
+ [/\\notlessgreater /g, '\u2278'],
+ [/\\notgreaterless /g, '\u2279'],
+ [/\\prec /g, '\u227A'],
+ [/\\succ /g, '\u227B'],
+ [/\\preccurlyeq /g, '\u227C'],
+ [/\\succcurlyeq /g, '\u227D'],
+ [/\\precapprox /g, '\u227E'],
+ [/\\NotPrecedesTilde /g, '\u227E-00338'],
+ [/\\succapprox /g, '\u227F'],
+ [/\\NotSucceedsTilde /g, '\u227F-00338'],
+ [/\\not\\prec /g, '\u2280'],
+ [/\\not\\succ /g, '\u2281'],
+ [/\\subset /g, '\u2282'],
+ [/\\supset /g, '\u2283'],
+ [/\\not\\subset /g, '\u2284'],
+ [/\\not\\supset /g, '\u2285'],
+ [/\\subseteq /g, '\u2286'],
+ [/\\supseteq /g, '\u2287'],
+ [/\\not\\subseteq /g, '\u2288'],
+ [/\\not\\supseteq /g, '\u2289'],
+ [/\\subsetneq /g, '\u228A'],
+ [/\\varsubsetneqq /g, '\u228A-0FE00'],
+ [/\\supsetneq /g, '\u228B'],
+ [/\\varsupsetneq /g, '\u228B-0FE00'],
+ [/\\uplus /g, '\u228E'],
+ [/\\sqsubset /g, '\u228F'],
+ [/\\NotSquareSubset /g, '\u228F-00338'],
+ [/\\sqsupset /g, '\u2290'],
+ [/\\NotSquareSuperset /g, '\u2290-00338'],
+ [/\\sqsubseteq /g, '\u2291'],
+ [/\\sqsupseteq /g, '\u2292'],
+ [/\\sqcap /g, '\u2293'],
+ [/\\sqcup /g, '\u2294'],
+ [/\\oplus /g, '\u2295'],
+ [/\\ominus /g, '\u2296'],
+ [/\\otimes /g, '\u2297'],
+ [/\\oslash /g, '\u2298'],
+ [/\\odot /g, '\u2299'],
+ [/\\circledcirc /g, '\u229A'],
+ [/\\circledast /g, '\u229B'],
+ [/\\circleddash /g, '\u229D'],
+ [/\\boxplus /g, '\u229E'],
+ [/\\boxminus /g, '\u229F'],
+ [/\\boxtimes /g, '\u22A0'],
+ [/\\boxdot /g, '\u22A1'],
+ [/\\vdash /g, '\u22A2'],
+ [/\\dashv /g, '\u22A3'],
+ [/\\top /g, '\u22A4'],
+ [/\\perp /g, '\u22A5'],
+ [/\\truestate /g, '\u22A7'],
+ [/\\forcesextra /g, '\u22A8'],
+ [/\\Vdash /g, '\u22A9'],
+ [/\\Vvdash /g, '\u22AA'],
+ [/\\VDash /g, '\u22AB'],
+ [/\\nvdash /g, '\u22AC'],
+ [/\\nvDash /g, '\u22AD'],
+ [/\\nVdash /g, '\u22AE'],
+ [/\\nVDash /g, '\u22AF'],
+ [/\\vartriangleleft /g, '\u22B2'],
+ [/\\vartriangleright /g, '\u22B3'],
+ [/\\trianglelefteq /g, '\u22B4'],
+ [/\\trianglerighteq /g, '\u22B5'],
+ [/\\original /g, '\u22B6'],
+ [/\\image /g, '\u22B7'],
+ [/\\multimap /g, '\u22B8'],
+ [/\\hermitconjmatrix /g, '\u22B9'],
+ [/\\intercal /g, '\u22BA'],
+ [/\\veebar /g, '\u22BB'],
+ [/\\rightanglearc /g, '\u22BE'],
+ [/\\ElsevierGlyph\{22C0\}/g, '\u22C0'],
+ [/\\ElsevierGlyph\{22C1\}/g, '\u22C1'],
+ [/\\bigcap /g, '\u22C2'],
+ [/\\bigcup /g, '\u22C3'],
+ [/\\diamond /g, '\u22C4'],
+ [/\\cdot /g, '\u22C5'],
+ [/\\star /g, '\u22C6'],
+ [/\\divideontimes /g, '\u22C7'],
+ [/\\bowtie /g, '\u22C8'],
+ [/\\ltimes /g, '\u22C9'],
+ [/\\rtimes /g, '\u22CA'],
+ [/\\leftthreetimes /g, '\u22CB'],
+ [/\\rightthreetimes /g, '\u22CC'],
+ [/\\backsimeq /g, '\u22CD'],
+ [/\\curlyvee /g, '\u22CE'],
+ [/\\curlywedge /g, '\u22CF'],
+ [/\\Subset /g, '\u22D0'],
+ [/\\Supset /g, '\u22D1'],
+ [/\\Cap /g, '\u22D2'],
+ [/\\Cup /g, '\u22D3'],
+ [/\\pitchfork /g, '\u22D4'],
+ [/\\lessdot /g, '\u22D6'],
+ [/\\gtrdot /g, '\u22D7'],
+ [/\\verymuchless /g, '\u22D8'],
+ [/\\verymuchgreater /g, '\u22D9'],
+ [/\\lesseqgtr /g, '\u22DA'],
+ [/\\gtreqless /g, '\u22DB'],
+ [/\\curlyeqprec /g, '\u22DE'],
+ [/\\curlyeqsucc /g, '\u22DF'],
+ [/\\not\\sqsubseteq /g, '\u22E2'],
+ [/\\not\\sqsupseteq /g, '\u22E3'],
+ [/\\Elzsqspne /g, '\u22E5'],
+ [/\\lnsim /g, '\u22E6'],
+ [/\\gnsim /g, '\u22E7'],
+ [/\\precedesnotsimilar /g, '\u22E8'],
+ [/\\succnsim /g, '\u22E9'],
+ [/\\ntriangleleft /g, '\u22EA'],
+ [/\\ntriangleright /g, '\u22EB'],
+ [/\\ntrianglelefteq /g, '\u22EC'],
+ [/\\ntrianglerighteq /g, '\u22ED'],
+ [/\\vdots /g, '\u22EE'],
+ [/\\cdots /g, '\u22EF'],
+ [/\\upslopeellipsis /g, '\u22F0'],
+ [/\\downslopeellipsis /g, '\u22F1'],
+ [/\\barwedge /g, '\u2305'],
+ [/\\perspcorrespond /g, '\u2306'],
+ [/\\lceil /g, '\u2308'],
+ [/\\rceil /g, '\u2309'],
+ [/\\lfloor /g, '\u230A'],
+ [/\\rfloor /g, '\u230B'],
+ [/\\recorder /g, '\u2315'],
+ [/\\mathchar"2208/g, '\u2316'],
+ [/\\ulcorner /g, '\u231C'],
+ [/\\urcorner /g, '\u231D'],
+ [/\\llcorner /g, '\u231E'],
+ [/\\lrcorner /g, '\u231F'],
+ [/\\frown /g, '\u2322'],
+ [/\\smile /g, '\u2323'],
+ [/\\langle /g, '\u2329'],
+ [/\\rangle /g, '\u232A'],
+ [/\\ElsevierGlyph\{E838\}/g, '\u233D'],
+ [/\\Elzdlcorn /g, '\u23A3'],
+ [/\\lmoustache /g, '\u23B0'],
+ [/\\rmoustache /g, '\u23B1'],
+ [/\\textvisiblespace /g, '\u2423'],
+ [/\\ding\{172\}/g, '\u2460'],
+ [/\\ding\{173\}/g, '\u2461'],
+ [/\\ding\{174\}/g, '\u2462'],
+ [/\\ding\{175\}/g, '\u2463'],
+ [/\\ding\{176\}/g, '\u2464'],
+ [/\\ding\{177\}/g, '\u2465'],
+ [/\\ding\{178\}/g, '\u2466'],
+ [/\\ding\{179\}/g, '\u2467'],
+ [/\\ding\{180\}/g, '\u2468'],
+ [/\\ding\{181\}/g, '\u2469'],
+ [/\\circledS /g, '\u24C8'],
+ [/\\Elzdshfnc /g, '\u2506'],
+ [/\\Elzsqfnw /g, '\u2519'],
+ [/\\diagup /g, '\u2571'],
+ [/\\ding\{110\}/g, '\u25A0'],
+ [/\\square /g, '\u25A1'],
+ [/\\blacksquare /g, '\u25AA'],
+ [/\\fbox\{~~\}/g, '\u25AD'],
+ [/\\Elzvrecto /g, '\u25AF'],
+ [/\\ElsevierGlyph\{E381\}/g, '\u25B1'],
+ [/\\ding\{115\}/g, '\u25B2'],
+ [/\\bigtriangleup /g, '\u25B3'],
+ [/\\blacktriangle /g, '\u25B4'],
+ [/\\vartriangle /g, '\u25B5'],
+ [/\\blacktriangleright /g, '\u25B8'],
+ [/\\triangleright /g, '\u25B9'],
+ [/\\ding\{116\}/g, '\u25BC'],
+ [/\\bigtriangledown /g, '\u25BD'],
+ [/\\blacktriangledown /g, '\u25BE'],
+ [/\\triangledown /g, '\u25BF'],
+ [/\\blacktriangleleft /g, '\u25C2'],
+ [/\\triangleleft /g, '\u25C3'],
+ [/\\ding\{117\}/g, '\u25C6'],
+ [/\\lozenge /g, '\u25CA'],
+ [/\\bigcirc /g, '\u25CB'],
+ [/\\ding\{108\}/g, '\u25CF'],
+ [/\\Elzcirfl /g, '\u25D0'],
+ [/\\Elzcirfr /g, '\u25D1'],
+ [/\\Elzcirfb /g, '\u25D2'],
+ [/\\ding\{119\}/g, '\u25D7'],
+ [/\\Elzrvbull /g, '\u25D8'],
+ [/\\Elzsqfl /g, '\u25E7'],
+ [/\\Elzsqfr /g, '\u25E8'],
+ [/\\Elzsqfse /g, '\u25EA'],
+ [/\\bigcirc /g, '\u25EF'],
+ [/\\ding\{72\}/g, '\u2605'],
+ [/\\ding\{73\}/g, '\u2606'],
+ [/\\ding\{37\}/g, '\u260E'],
+ [/\\ding\{42\}/g, '\u261B'],
+ [/\\ding\{43\}/g, '\u261E'],
+ [/\\rightmoon /g, '\u263E'],
+ [/\\mercury /g, '\u263F'],
+ [/\\venus /g, '\u2640'],
+ [/\\male /g, '\u2642'],
+ [/\\jupiter /g, '\u2643'],
+ [/\\saturn /g, '\u2644'],
+ [/\\uranus /g, '\u2645'],
+ [/\\neptune /g, '\u2646'],
+ [/\\pluto /g, '\u2647'],
+ [/\\aries /g, '\u2648'],
+ [/\\taurus /g, '\u2649'],
+ [/\\gemini /g, '\u264A'],
+ [/\\cancer /g, '\u264B'],
+ [/\\leo /g, '\u264C'],
+ [/\\virgo /g, '\u264D'],
+ [/\\libra /g, '\u264E'],
+ [/\\scorpio /g, '\u264F'],
+ [/\\sagittarius /g, '\u2650'],
+ [/\\capricornus /g, '\u2651'],
+ [/\\aquarius /g, '\u2652'],
+ [/\\pisces /g, '\u2653'],
+ [/\\ding\{171\}/g, '\u2660'],
+ [/\\diamond /g, '\u2662'],
+ [/\\ding\{168\}/g, '\u2663'],
+ [/\\ding\{170\}/g, '\u2665'],
+ [/\\ding\{169\}/g, '\u2666'],
+ [/\\quarternote /g, '\u2669'],
+ [/\\eighthnote /g, '\u266A'],
+ [/\\flat /g, '\u266D'],
+ [/\\natural /g, '\u266E'],
+ [/\\sharp /g, '\u266F'],
+ [/\\ding\{33\}/g, '\u2701'],
+ [/\\ding\{34\}/g, '\u2702'],
+ [/\\ding\{35\}/g, '\u2703'],
+ [/\\ding\{36\}/g, '\u2704'],
+ [/\\ding\{38\}/g, '\u2706'],
+ [/\\ding\{39\}/g, '\u2707'],
+ [/\\ding\{40\}/g, '\u2708'],
+ [/\\ding\{41\}/g, '\u2709'],
+ [/\\ding\{44\}/g, '\u270C'],
+ [/\\ding\{45\}/g, '\u270D'],
+ [/\\ding\{46\}/g, '\u270E'],
+ [/\\ding\{47\}/g, '\u270F'],
+ [/\\ding\{48\}/g, '\u2710'],
+ [/\\ding\{49\}/g, '\u2711'],
+ [/\\ding\{50\}/g, '\u2712'],
+ [/\\ding\{51\}/g, '\u2713'],
+ [/\\ding\{52\}/g, '\u2714'],
+ [/\\ding\{53\}/g, '\u2715'],
+ [/\\ding\{54\}/g, '\u2716'],
+ [/\\ding\{55\}/g, '\u2717'],
+ [/\\ding\{56\}/g, '\u2718'],
+ [/\\ding\{57\}/g, '\u2719'],
+ [/\\ding\{58\}/g, '\u271A'],
+ [/\\ding\{59\}/g, '\u271B'],
+ [/\\ding\{60\}/g, '\u271C'],
+ [/\\ding\{61\}/g, '\u271D'],
+ [/\\ding\{62\}/g, '\u271E'],
+ [/\\ding\{63\}/g, '\u271F'],
+ [/\\ding\{64\}/g, '\u2720'],
+ [/\\ding\{65\}/g, '\u2721'],
+ [/\\ding\{66\}/g, '\u2722'],
+ [/\\ding\{67\}/g, '\u2723'],
+ [/\\ding\{68\}/g, '\u2724'],
+ [/\\ding\{69\}/g, '\u2725'],
+ [/\\ding\{70\}/g, '\u2726'],
+ [/\\ding\{71\}/g, '\u2727'],
+ [/\\ding\{73\}/g, '\u2729'],
+ [/\\ding\{74\}/g, '\u272A'],
+ [/\\ding\{75\}/g, '\u272B'],
+ [/\\ding\{76\}/g, '\u272C'],
+ [/\\ding\{77\}/g, '\u272D'],
+ [/\\ding\{78\}/g, '\u272E'],
+ [/\\ding\{79\}/g, '\u272F'],
+ [/\\ding\{80\}/g, '\u2730'],
+ [/\\ding\{81\}/g, '\u2731'],
+ [/\\ding\{82\}/g, '\u2732'],
+ [/\\ding\{83\}/g, '\u2733'],
+ [/\\ding\{84\}/g, '\u2734'],
+ [/\\ding\{85\}/g, '\u2735'],
+ [/\\ding\{86\}/g, '\u2736'],
+ [/\\ding\{87\}/g, '\u2737'],
+ [/\\ding\{88\}/g, '\u2738'],
+ [/\\ding\{89\}/g, '\u2739'],
+ [/\\ding\{90\}/g, '\u273A'],
+ [/\\ding\{91\}/g, '\u273B'],
+ [/\\ding\{92\}/g, '\u273C'],
+ [/\\ding\{93\}/g, '\u273D'],
+ [/\\ding\{94\}/g, '\u273E'],
+ [/\\ding\{95\}/g, '\u273F'],
+ [/\\ding\{96\}/g, '\u2740'],
+ [/\\ding\{97\}/g, '\u2741'],
+ [/\\ding\{98\}/g, '\u2742'],
+ [/\\ding\{99\}/g, '\u2743'],
+ [/\\ding\{100\}/g, '\u2744'],
+ [/\\ding\{101\}/g, '\u2745'],
+ [/\\ding\{102\}/g, '\u2746'],
+ [/\\ding\{103\}/g, '\u2747'],
+ [/\\ding\{104\}/g, '\u2748'],
+ [/\\ding\{105\}/g, '\u2749'],
+ [/\\ding\{106\}/g, '\u274A'],
+ [/\\ding\{107\}/g, '\u274B'],
+ [/\\ding\{109\}/g, '\u274D'],
+ [/\\ding\{111\}/g, '\u274F'],
+ [/\\ding\{112\}/g, '\u2750'],
+ [/\\ding\{113\}/g, '\u2751'],
+ [/\\ding\{114\}/g, '\u2752'],
+ [/\\ding\{118\}/g, '\u2756'],
+ [/\\ding\{120\}/g, '\u2758'],
+ [/\\ding\{121\}/g, '\u2759'],
+ [/\\ding\{122\}/g, '\u275A'],
+ [/\\ding\{123\}/g, '\u275B'],
+ [/\\ding\{124\}/g, '\u275C'],
+ [/\\ding\{125\}/g, '\u275D'],
+ [/\\ding\{126\}/g, '\u275E'],
+ [/\\ding\{161\}/g, '\u2761'],
+ [/\\ding\{162\}/g, '\u2762'],
+ [/\\ding\{163\}/g, '\u2763'],
+ [/\\ding\{164\}/g, '\u2764'],
+ [/\\ding\{165\}/g, '\u2765'],
+ [/\\ding\{166\}/g, '\u2766'],
+ [/\\ding\{167\}/g, '\u2767'],
+ [/\\ding\{182\}/g, '\u2776'],
+ [/\\ding\{183\}/g, '\u2777'],
+ [/\\ding\{184\}/g, '\u2778'],
+ [/\\ding\{185\}/g, '\u2779'],
+ [/\\ding\{186\}/g, '\u277A'],
+ [/\\ding\{187\}/g, '\u277B'],
+ [/\\ding\{188\}/g, '\u277C'],
+ [/\\ding\{189\}/g, '\u277D'],
+ [/\\ding\{190\}/g, '\u277E'],
+ [/\\ding\{191\}/g, '\u277F'],
+ [/\\ding\{192\}/g, '\u2780'],
+ [/\\ding\{193\}/g, '\u2781'],
+ [/\\ding\{194\}/g, '\u2782'],
+ [/\\ding\{195\}/g, '\u2783'],
+ [/\\ding\{196\}/g, '\u2784'],
+ [/\\ding\{197\}/g, '\u2785'],
+ [/\\ding\{198\}/g, '\u2786'],
+ [/\\ding\{199\}/g, '\u2787'],
+ [/\\ding\{200\}/g, '\u2788'],
+ [/\\ding\{201\}/g, '\u2789'],
+ [/\\ding\{202\}/g, '\u278A'],
+ [/\\ding\{203\}/g, '\u278B'],
+ [/\\ding\{204\}/g, '\u278C'],
+ [/\\ding\{205\}/g, '\u278D'],
+ [/\\ding\{206\}/g, '\u278E'],
+ [/\\ding\{207\}/g, '\u278F'],
+ [/\\ding\{208\}/g, '\u2790'],
+ [/\\ding\{209\}/g, '\u2791'],
+ [/\\ding\{210\}/g, '\u2792'],
+ [/\\ding\{211\}/g, '\u2793'],
+ [/\\ding\{212\}/g, '\u2794'],
+ [/\\ding\{216\}/g, '\u2798'],
+ [/\\ding\{217\}/g, '\u2799'],
+ [/\\ding\{218\}/g, '\u279A'],
+ [/\\ding\{219\}/g, '\u279B'],
+ [/\\ding\{220\}/g, '\u279C'],
+ [/\\ding\{221\}/g, '\u279D'],
+ [/\\ding\{222\}/g, '\u279E'],
+ [/\\ding\{223\}/g, '\u279F'],
+ [/\\ding\{224\}/g, '\u27A0'],
+ [/\\ding\{225\}/g, '\u27A1'],
+ [/\\ding\{226\}/g, '\u27A2'],
+ [/\\ding\{227\}/g, '\u27A3'],
+ [/\\ding\{228\}/g, '\u27A4'],
+ [/\\ding\{229\}/g, '\u27A5'],
+ [/\\ding\{230\}/g, '\u27A6'],
+ [/\\ding\{231\}/g, '\u27A7'],
+ [/\\ding\{232\}/g, '\u27A8'],
+ [/\\ding\{233\}/g, '\u27A9'],
+ [/\\ding\{234\}/g, '\u27AA'],
+ [/\\ding\{235\}/g, '\u27AB'],
+ [/\\ding\{236\}/g, '\u27AC'],
+ [/\\ding\{237\}/g, '\u27AD'],
+ [/\\ding\{238\}/g, '\u27AE'],
+ [/\\ding\{239\}/g, '\u27AF'],
+ [/\\ding\{241\}/g, '\u27B1'],
+ [/\\ding\{242\}/g, '\u27B2'],
+ [/\\ding\{243\}/g, '\u27B3'],
+ [/\\ding\{244\}/g, '\u27B4'],
+ [/\\ding\{245\}/g, '\u27B5'],
+ [/\\ding\{246\}/g, '\u27B6'],
+ [/\\ding\{247\}/g, '\u27B7'],
+ [/\\ding\{248\}/g, '\u27B8'],
+ [/\\ding\{249\}/g, '\u27B9'],
+ [/\\ding\{250\}/g, '\u27BA'],
+ [/\\ding\{251\}/g, '\u27BB'],
+ [/\\ding\{252\}/g, '\u27BC'],
+ [/\\ding\{253\}/g, '\u27BD'],
+ [/\\ding\{254\}/g, '\u27BE'],
+ [/\\longleftarrow /g, '\u27F5'],
+ [/\\longrightarrow /g, '\u27F6'],
+ [/\\longleftrightarrow /g, '\u27F7'],
+ [/\\Longleftarrow /g, '\u27F8'],
+ [/\\Longrightarrow /g, '\u27F9'],
+ [/\\Longleftrightarrow /g, '\u27FA'],
+ [/\\longmapsto /g, '\u27FC'],
+ [/\\sim\\joinrel\\leadsto/g, '\u27FF'],
+ [/\\ElsevierGlyph\{E212\}/g, '\u2905'],
+ [/\\UpArrowBar /g, '\u2912'],
+ [/\\DownArrowBar /g, '\u2913'],
+ [/\\ElsevierGlyph\{E20C\}/g, '\u2923'],
+ [/\\ElsevierGlyph\{E20D\}/g, '\u2924'],
+ [/\\ElsevierGlyph\{E20B\}/g, '\u2925'],
+ [/\\ElsevierGlyph\{E20A\}/g, '\u2926'],
+ [/\\ElsevierGlyph\{E211\}/g, '\u2927'],
+ [/\\ElsevierGlyph\{E20E\}/g, '\u2928'],
+ [/\\ElsevierGlyph\{E20F\}/g, '\u2929'],
+ [/\\ElsevierGlyph\{E210\}/g, '\u292A'],
+ [/\\ElsevierGlyph\{E21C\}/g, '\u2933'],
+ [/\\ElsevierGlyph\{E21D\}/g, '\u2933-00338'],
+ [/\\ElsevierGlyph\{E21A\}/g, '\u2936'],
+ [/\\ElsevierGlyph\{E219\}/g, '\u2937'],
+ [/\\Elolarr /g, '\u2940'],
+ [/\\Elorarr /g, '\u2941'],
+ [/\\ElzRlarr /g, '\u2942'],
+ [/\\ElzrLarr /g, '\u2944'],
+ [/\\Elzrarrx /g, '\u2947'],
+ [/\\LeftRightVector /g, '\u294E'],
+ [/\\RightUpDownVector /g, '\u294F'],
+ [/\\DownLeftRightVector /g, '\u2950'],
+ [/\\LeftUpDownVector /g, '\u2951'],
+ [/\\LeftVectorBar /g, '\u2952'],
+ [/\\RightVectorBar /g, '\u2953'],
+ [/\\RightUpVectorBar /g, '\u2954'],
+ [/\\RightDownVectorBar /g, '\u2955'],
+ [/\\DownLeftVectorBar /g, '\u2956'],
+ [/\\DownRightVectorBar /g, '\u2957'],
+ [/\\LeftUpVectorBar /g, '\u2958'],
+ [/\\LeftDownVectorBar /g, '\u2959'],
+ [/\\LeftTeeVector /g, '\u295A'],
+ [/\\RightTeeVector /g, '\u295B'],
+ [/\\RightUpTeeVector /g, '\u295C'],
+ [/\\RightDownTeeVector /g, '\u295D'],
+ [/\\DownLeftTeeVector /g, '\u295E'],
+ [/\\DownRightTeeVector /g, '\u295F'],
+ [/\\LeftUpTeeVector /g, '\u2960'],
+ [/\\LeftDownTeeVector /g, '\u2961'],
+ [/\\UpEquilibrium /g, '\u296E'],
+ [/\\ReverseUpEquilibrium /g, '\u296F'],
+ [/\\RoundImplies /g, '\u2970'],
+ [/\\ElsevierGlyph\{E214\}/g, '\u297C'],
+ [/\\ElsevierGlyph\{E215\}/g, '\u297D'],
+ [/\\Elztfnc /g, '\u2980'],
+ [/\\ElsevierGlyph\{3018\}/g, '\u2985'],
+ [/\\Elroang /g, '\u2986'],
+ [/\\ElsevierGlyph\{E291\}/g, '\u2994'],
+ [/\\Elzddfnc /g, '\u2999'],
+ [/\\Angle /g, '\u299C'],
+ [/\\Elzlpargt /g, '\u29A0'],
+ [/\\ElsevierGlyph\{E260\}/g, '\u29B5'],
+ [/\\ElsevierGlyph\{E61B\}/g, '\u29B6'],
+ [/\\ElzLap /g, '\u29CA'],
+ [/\\Elzdefas /g, '\u29CB'],
+ [/\\LeftTriangleBar /g, '\u29CF'],
+ [/\\NotLeftTriangleBar /g, '\u29CF-00338'],
+ [/\\RightTriangleBar /g, '\u29D0'],
+ [/\\NotRightTriangleBar /g, '\u29D0-00338'],
+ [/\\ElsevierGlyph\{E372\}/g, '\u29DC'],
+ [/\\blacklozenge /g, '\u29EB'],
+ [/\\RuleDelayed /g, '\u29F4'],
+ [/\\Elxuplus /g, '\u2A04'],
+ [/\\ElzThr /g, '\u2A05'],
+ [/\\Elxsqcup /g, '\u2A06'],
+ [/\\ElzInf /g, '\u2A07'],
+ [/\\ElzSup /g, '\u2A08'],
+ [/\\ElzCint /g, '\u2A0D'],
+ [/\\clockoint /g, '\u2A0F'],
+ [/\\ElsevierGlyph\{E395\}/g, '\u2A10'],
+ [/\\sqrint /g, '\u2A16'],
+ [/\\ElsevierGlyph\{E25A\}/g, '\u2A25'],
+ [/\\ElsevierGlyph\{E25B\}/g, '\u2A2A'],
+ [/\\ElsevierGlyph\{E25C\}/g, '\u2A2D'],
+ [/\\ElsevierGlyph\{E25D\}/g, '\u2A2E'],
+ [/\\ElzTimes /g, '\u2A2F'],
+ [/\\ElsevierGlyph\{E25E\}/g, '\u2A34'],
+ [/\\ElsevierGlyph\{E25E\}/g, '\u2A35'],
+ [/\\ElsevierGlyph\{E259\}/g, '\u2A3C'],
+ [/\\amalg /g, '\u2A3F'],
+ [/\\ElzAnd /g, '\u2A53'],
+ [/\\ElzOr /g, '\u2A54'],
+ [/\\ElsevierGlyph\{E36E\}/g, '\u2A55'],
+ [/\\ElOr /g, '\u2A56'],
+ [/\\perspcorrespond /g, '\u2A5E'],
+ [/\\Elzminhat /g, '\u2A5F'],
+ [/\\ElsevierGlyph\{225A\}/g, '\u2A63'],
+ [/\\stackrel\{*\}\{=\}/g, '\u2A6E'],
+ [/\\Equal /g, '\u2A75'],
+ [/\\leqslant /g, '\u2A7D'],
+ [/\\nleqslant /g, '\u2A7D-00338'],
+ [/\\geqslant /g, '\u2A7E'],
+ [/\\ngeqslant /g, '\u2A7E-00338'],
+ [/\\lessapprox /g, '\u2A85'],
+ [/\\gtrapprox /g, '\u2A86'],
+ [/\\lneq /g, '\u2A87'],
+ [/\\gneq /g, '\u2A88'],
+ [/\\lnapprox /g, '\u2A89'],
+ [/\\gnapprox /g, '\u2A8A'],
+ [/\\lesseqqgtr /g, '\u2A8B'],
+ [/\\gtreqqless /g, '\u2A8C'],
+ [/\\eqslantless /g, '\u2A95'],
+ [/\\eqslantgtr /g, '\u2A96'],
+ [/\\Pisymbol\{ppi020\}\{117\}/g, '\u2A9D'],
+ [/\\Pisymbol\{ppi020\}\{105\}/g, '\u2A9E'],
+ [/\\NestedLessLess /g, '\u2AA1'],
+ [/\\NotNestedLessLess /g, '\u2AA1-00338'],
+ [/\\NestedGreaterGreater /g, '\u2AA2'],
+ [/\\NotNestedGreaterGreater /g, '\u2AA2-00338'],
+ [/\\preceq /g, '\u2AAF'],
+ [/\\not\\preceq /g, '\u2AAF-00338'],
+ [/\\succeq /g, '\u2AB0'],
+ [/\\not\\succeq /g, '\u2AB0-00338'],
+ [/\\precneqq /g, '\u2AB5'],
+ [/\\succneqq /g, '\u2AB6'],
+ [/\\precapprox /g, '\u2AB7'],
+ [/\\succapprox /g, '\u2AB8'],
+ [/\\precnapprox /g, '\u2AB9'],
+ [/\\succnapprox /g, '\u2ABA'],
+ [/\\subseteqq /g, '\u2AC5'],
+ [/\\nsubseteqq /g, '\u2AC5-00338'],
+ [/\\supseteqq /g, '\u2AC6'],
+ [/\\nsupseteqq/g, '\u2AC6-00338'],
+ [/\\subsetneqq /g, '\u2ACB'],
+ [/\\supsetneqq /g, '\u2ACC'],
+ [/\\ElsevierGlyph\{E30D\}/g, '\u2AEB'],
+ [/\\Elztdcol /g, '\u2AF6'],
+ [/\\ElsevierGlyph\{300A\}/g, '\u300A'],
+ [/\\ElsevierGlyph\{300B\}/g, '\u300B'],
+ [/\\ElsevierGlyph\{3018\}/g, '\u3018'],
+ [/\\ElsevierGlyph\{3019\}/g, '\u3019'],
+ [/\\openbracketleft /g, '\u301A'],
+ [/\\openbracketright /g, '\u301B'],
+]
+
+export default BibtexParser
+if (typeof module !== 'undefined' && module.exports) {
+ module.exports = BibtexParser
+}
diff --git a/services/references/buildscript.txt b/services/references/buildscript.txt
new file mode 100644
index 0000000000..05771cd85a
--- /dev/null
+++ b/services/references/buildscript.txt
@@ -0,0 +1,9 @@
+references
+--dependencies=mongo
+--docker-repos=us-east1-docker.pkg.dev/overleaf-ops/ol-docker
+--env-add=
+--env-pass-through=
+--esmock-loader=True
+--node-version=20.18.2
+--public-repo=False
+--script-version=4.5.0
diff --git a/services/references/config/settings.defaults.cjs b/services/references/config/settings.defaults.cjs
new file mode 100644
index 0000000000..2551f99f09
--- /dev/null
+++ b/services/references/config/settings.defaults.cjs
@@ -0,0 +1,9 @@
+module.exports = {
+ internal: {
+ references: {
+ port: 3056,
+ host: process.env.REFERENCES_HOST || '127.0.0.1',
+ },
+ },
+}
+
diff --git a/services/references/docker-compose.ci.yml b/services/references/docker-compose.ci.yml
new file mode 100644
index 0000000000..51eb64d126
--- /dev/null
+++ b/services/references/docker-compose.ci.yml
@@ -0,0 +1,52 @@
+# This file was auto-generated, do not edit it directly.
+# Instead run bin/update_build_scripts from
+# https://github.com/overleaf/internal/
+
+version: "2.3"
+
+services:
+ test_unit:
+ image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
+ user: node
+ command: npm run test:unit:_run
+ environment:
+ NODE_ENV: test
+ NODE_OPTIONS: "--unhandled-rejections=strict"
+
+
+ test_acceptance:
+ build: .
+ image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
+ environment:
+ ELASTIC_SEARCH_DSN: es:9200
+ MONGO_HOST: mongo
+ POSTGRES_HOST: postgres
+ MOCHA_GREP: ${MOCHA_GREP}
+ NODE_ENV: test
+ NODE_OPTIONS: "--unhandled-rejections=strict"
+ depends_on:
+ mongo:
+ condition: service_started
+ user: node
+ command: npm run test:acceptance
+
+
+ tar:
+ build: .
+ image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
+ volumes:
+ - ./:/tmp/build/
+ command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
+ user: root
+ mongo:
+ image: mongo:6.0.13
+ command: --replSet overleaf
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
diff --git a/services/references/docker-compose.yml b/services/references/docker-compose.yml
new file mode 100644
index 0000000000..ad71431768
--- /dev/null
+++ b/services/references/docker-compose.yml
@@ -0,0 +1,56 @@
+# This file was auto-generated, do not edit it directly.
+# Instead run bin/update_build_scripts from
+# https://github.com/overleaf/internal/
+
+version: "2.3"
+
+services:
+ test_unit:
+ image: node:20.18.2
+ volumes:
+ - .:/overleaf/services/references
+ - ../../node_modules:/overleaf/node_modules
+ - ../../libraries:/overleaf/libraries
+ working_dir: /overleaf/services/references
+ environment:
+ MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
+ NODE_ENV: test
+ NODE_OPTIONS: "--unhandled-rejections=strict"
+ command: npm run --silent test:unit
+ user: node
+
+ test_acceptance:
+ image: node:20.18.2
+ volumes:
+ - .:/overleaf/services/references
+ - ../../node_modules:/overleaf/node_modules
+ - ../../libraries:/overleaf/libraries
+ working_dir: /overleaf/services/references
+ environment:
+ ELASTIC_SEARCH_DSN: es:9200
+ MONGO_HOST: mongo
+ POSTGRES_HOST: postgres
+ MOCHA_GREP: ${MOCHA_GREP}
+ LOG_LEVEL: ${LOG_LEVEL:-}
+ NODE_ENV: test
+ NODE_OPTIONS: "--unhandled-rejections=strict"
+ user: node
+ depends_on:
+ mongo:
+ condition: service_started
+ command: npm run --silent test:acceptance
+
+ mongo:
+ image: mongo:6.0.13
+ command: --replSet overleaf
+ volumes:
+ - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+ environment:
+ MONGO_INITDB_DATABASE: sharelatex
+ extra_hosts:
+ # Required when using the automatic database setup for initializing the
+ # replica set. This override is not needed when running the setup after
+ # starting up mongo.
+ - mongo:127.0.0.1
+
diff --git a/services/references/package.json b/services/references/package.json
new file mode 100644
index 0000000000..9b0988e7ac
--- /dev/null
+++ b/services/references/package.json
@@ -0,0 +1,26 @@
+{
+ "name": "@overleaf/references",
+ "description": "An API for providing citation-keys",
+ "private": true,
+ "type": "module",
+ "main": "app.js",
+ "scripts": {
+ "start": "node app.js"
+ },
+ "version": "0.1.0",
+ "dependencies": {
+ "@overleaf/settings": "*",
+ "@overleaf/logger": "*",
+ "@overleaf/metrics": "*",
+ "async": "^3.2.5",
+ "express": "^4.21.2"
+ },
+ "devDependencies": {
+ "chai": "^4.3.6",
+ "chai-as-promised": "^7.1.1",
+ "esmock": "^2.6.9",
+ "mocha": "^11.1.0",
+ "sinon": "^9.2.4",
+ "typescript": "^5.0.4"
+ }
+}
diff --git a/services/references/tsconfig.json b/services/references/tsconfig.json
new file mode 100644
index 0000000000..d3fdd3022a
--- /dev/null
+++ b/services/references/tsconfig.json
@@ -0,0 +1,12 @@
+{
+ "extends": "../../tsconfig.backend.json",
+ "include": [
+ "app.js",
+ "app/js/**/*",
+ "benchmarks/**/*",
+ "config/**/*",
+ "scripts/**/*",
+ "test/**/*",
+ "types"
+ ]
+}
diff --git a/services/web/.eslintrc.js b/services/web/.eslintrc.js
index 193313152c..7dd154c942 100644
--- a/services/web/.eslintrc.js
+++ b/services/web/.eslintrc.js
@@ -1,3 +1,7 @@
+const _ = require('lodash')
+const confusingBrowserGlobals = require('confusing-browser-globals')
+const globals = require('globals')
+
module.exports = {
root: true,
parser: '@typescript-eslint/parser',
@@ -19,6 +23,7 @@ module.exports = {
},
rules: {
'no-constant-binary-expression': 'error',
+ 'no-restricted-globals': ['error', ...confusingBrowserGlobals],
// do not allow importing of implicit dependencies.
'import/no-extraneous-dependencies': 'error',
@@ -39,6 +44,12 @@ module.exports = {
'error',
{ functions: false, classes: false, variables: false },
],
+ 'react-hooks/exhaustive-deps': [
+ 'warn',
+ {
+ additionalHooks: '(useCommandProvider)',
+ },
+ ],
},
overrides: [
// NOTE: changing paths may require updating them in the Makefile too.
@@ -58,6 +69,10 @@ module.exports = {
{
// Test specific rules
files: ['**/test/**/*.*'],
+ excludedFiles: [
+ '**/test/unit/src/**/*.test.mjs',
+ 'test/unit/vitest_bootstrap.mjs',
+ ], // exclude vitest files
plugins: ['mocha', 'chai-expect', 'chai-friendly'],
env: {
mocha: true,
@@ -89,6 +104,30 @@ module.exports = {
'@typescript-eslint/no-unused-expressions': 'off',
},
},
+ {
+ files: [
+ '**/test/unit/src/**/*.test.mjs',
+ 'test/unit/vitest_bootstrap.mjs',
+ ],
+ env: {
+ jest: true, // best match for vitest API etc.
+ },
+ plugins: ['@vitest', 'chai-expect', 'chai-friendly'], // still using chai for now
+ rules: {
+ // vitest-specific rules
+ '@vitest/no-focused-tests': 'error',
+ '@vitest/no-disabled-tests': 'error',
+
+ // Swap the no-unused-expressions rule with a more chai-friendly one
+ 'no-unused-expressions': 'off',
+ 'chai-friendly/no-unused-expressions': 'error',
+
+ // chai-specific rules
+ 'chai-expect/missing-assertion': 'error',
+ 'chai-expect/terminating-properties': 'error',
+ '@typescript-eslint/no-unused-expressions': 'off',
+ },
+ },
{
// ES specific rules
files: [
@@ -107,7 +146,14 @@ module.exports = {
},
plugins: ['unicorn'],
rules: {
- 'import/no-unresolved': 'error',
+ 'import/no-unresolved': [
+ 'error',
+ {
+ // eslint-plugin-import does not support exports directive in package.json
+ // https://github.com/import-js/eslint-plugin-import/issues/1810
+ ignore: ['^p-queue$'],
+ },
+ ],
'import/extensions': [
'error',
'ignorePackages',
@@ -218,6 +264,24 @@ module.exports = {
],
extends: ['plugin:cypress/recommended'],
},
+ {
+ // Frontend test specific rules
+ files: ['**/frontend/**/*.test.{js,jsx,ts,tsx}'],
+ plugins: ['testing-library'],
+ extends: ['plugin:testing-library/react'],
+ rules: {
+ 'testing-library/no-await-sync-events': 'off',
+ 'testing-library/no-await-sync-queries': 'off',
+ 'testing-library/no-container': 'off',
+ 'testing-library/no-node-access': 'off',
+ 'testing-library/no-render-in-lifecycle': 'off',
+ 'testing-library/no-wait-for-multiple-assertions': 'off',
+ 'testing-library/no-wait-for-side-effects': 'off',
+ 'testing-library/prefer-query-by-disappearance': 'off',
+ 'testing-library/prefer-screen-queries': 'off',
+ 'testing-library/render-result-naming-convention': 'off',
+ },
+ },
{
// Frontend specific rules
files: [
@@ -244,7 +308,6 @@ module.exports = {
globals: {
__webpack_public_path__: true,
$: true,
- angular: true,
ga: true,
},
rules: {
@@ -325,6 +388,18 @@ module.exports = {
'Modify location via customLocalStorage instead of calling window.localStorage methods directly',
},
],
+ 'no-unused-vars': 'off',
+ '@typescript-eslint/no-unused-vars': [
+ 'error',
+ {
+ args: 'after-used',
+ argsIgnorePattern: '^_',
+ ignoreRestSiblings: false,
+ caughtErrors: 'none',
+ vars: 'all',
+ varsIgnorePattern: '^_',
+ },
+ ],
},
},
{
@@ -461,5 +536,17 @@ module.exports = {
'no-console': 'error',
},
},
+ {
+ files: ['**/*.worker.{js,ts}'],
+ rules: {
+ 'no-restricted-globals': [
+ 'error',
+ ..._.difference(
+ Object.keys({ ...globals.browser, ...globals.node }),
+ Object.keys(globals.worker)
+ ),
+ ],
+ },
+ },
],
}
diff --git a/services/web/.gitignore b/services/web/.gitignore
index 8bd23b7f0a..9946f23ae6 100644
--- a/services/web/.gitignore
+++ b/services/web/.gitignore
@@ -1,51 +1,6 @@
-# Compiled source #
-###################
-*.com
-*.class
-*.dll
-*.exe
-*.o
-*.so
-
-# Packages #
-############
-# it's better to unpack these files and commit the raw source
-# git has its own built in compression methods
-*.7z
-*.dmg
-*.gz
-*.iso
-*.jar
-*.rar
-*.tar
-*.zip
-
-# Logs and databases #
-######################
-*.log
-*.sql
-*.sqlite
-
-# OS generated files #
-######################
-.DS_Store?
-ehthumbs.db
-Icon?
-Thumbs.db
-
-# allow "icons"
-![Ii]cons
-
-node_modules/*
data/*
coverage
-cookies.txt
-requestQueueWorker.js
-TpdsWorker.js
-BackgroundJobsWorker.js
-UserAndProjectPopulator.coffee
-
public/manifest.json
public/js
@@ -54,22 +9,6 @@ public/stylesheets
public/fonts
public/images
-Gemfile.lock
-
-*.swp
-.DS_Store
-
-docker-shared.yml
-
-config/*.coffee
-!config/settings.defaults.coffee
-!config/settings.webpack.coffee
-config/*.js
-!config/settings.defaults.js
-!config/settings.webpack.js
-!config/settings.overrides.saas.js
-!config/settings.overrides.server-pro.js
-
modules/**/Makefile
# Precompiled pug files
@@ -78,13 +17,6 @@ modules/**/Makefile
# Sentry secrets file (injected by CI)
.sentryclirc
-# via dev-environment
-.npmrc
-
-# Intellij
-.idea
-.run
-
# Cypress
cypress/screenshots/
cypress/videos/
diff --git a/services/web/.nvmrc b/services/web/.nvmrc
index 2a393af592..fc37597bcc 100644
--- a/services/web/.nvmrc
+++ b/services/web/.nvmrc
@@ -1 +1 @@
-20.18.0
+22.17.0
diff --git a/services/web/.prettierignore b/services/web/.prettierignore
index f4be187b87..2e8db8b35b 100644
--- a/services/web/.prettierignore
+++ b/services/web/.prettierignore
@@ -6,9 +6,35 @@ frontend/js/vendor
modules/**/frontend/js/vendor
public/js
public/minjs
+frontend/stylesheets/bootstrap-5/modules/metrics/nvd3.scss
frontend/stylesheets/components/nvd3.less
frontend/js/features/source-editor/lezer-latex/latex.mjs
frontend/js/features/source-editor/lezer-latex/latex.terms.mjs
frontend/js/features/source-editor/lezer-bibtex/bibtex.mjs
frontend/js/features/source-editor/lezer-bibtex/bibtex.terms.mjs
frontend/js/features/source-editor/hunspell/wasm/hunspell.mjs
+
+# complex pages
+app/views/project/editor.pug
+app/views/project/editor/**
+modules/open-in-overleaf/app/views/documentation.pug
+modules/references-search/app/views/project/editor/**
+modules/rich-text/app/views/toolbar.pug
+
+# loops
+app/views/referal/bonus.pug
+modules/templates/app/views/tag.pug
+
+# expressions that could not be formatted correctly
+app/views/_mixins/faq_search.pug
+app/views/external/home/v2.pug
+app/views/project/token/access.pug
+app/views/user/primaryEmailCheck.pug
+app/views/user/restricted.pug
+modules/admin-panel/app/views/project/show.pug
+modules/templates/app/views/project/editor/_left-menu.pug
+modules/two-factor-authentication/app/views/_mixins.pug
+
+# minified files
+app/views/_google_analytics.pug
+app/views/_customer_io.pug
diff --git a/services/web/.prettierrc b/services/web/.prettierrc
index 13e31862ff..b99212a874 100644
--- a/services/web/.prettierrc
+++ b/services/web/.prettierrc
@@ -1,9 +1,23 @@
{
"arrowParens": "avoid",
"jsxSingleQuote": false,
+ "pugAttributeSeparator": "as-needed",
+ "pugBracketSpacing": false,
+ "pugClassNotation": "as-is",
+ "pugIdNotation": "as-is",
+ "pugSortAttributesBeginning": ["name", "data-type"],
+ "plugins": ["@prettier/plugin-pug"],
"semi": false,
"singleQuote": true,
"trailingComma": "es5",
"tabWidth": 2,
- "useTabs": false
+ "useTabs": false,
+ "overrides": [
+ {
+ "files": "*.pug",
+ "options": {
+ "useTabs": true
+ }
+ }
+ ]
}
diff --git a/services/web/.storybook/preview.tsx b/services/web/.storybook/preview.tsx
index 8707cde5f3..c5d50da27a 100644
--- a/services/web/.storybook/preview.tsx
+++ b/services/web/.storybook/preview.tsx
@@ -1,7 +1,7 @@
import type { Preview } from '@storybook/react'
// Storybook does not (currently) support async loading of "stories". Therefore
-// the strategy in frontend/js/i18n.js does not work (because we cannot wait on
+// the strategy in frontend/js/i18n.ts does not work (because we cannot wait on
// the promise to resolve).
// Therefore we have to use the synchronous method for configuring
// react-i18next. Because this, we can only hard-code a single language.
@@ -9,14 +9,16 @@ import i18n from 'i18next'
import { initReactI18next } from 'react-i18next'
// @ts-ignore
import en from '../../../services/web/locales/en.json'
-import { bootstrapVersionArg } from './utils/with-bootstrap-switcher'
-function resetMeta(bootstrapVersion?: 3 | 5) {
+function resetMeta() {
window.metaAttributesCache = new Map()
window.metaAttributesCache.set('ol-i18n', { currentLangCode: 'en' })
- if (bootstrapVersion) {
- window.metaAttributesCache.set('ol-bootstrapVersion', bootstrapVersion)
- }
+ window.metaAttributesCache.set('ol-projectHistoryBlobsEnabled', true)
+ window.metaAttributesCache.set('ol-capabilities', ['chat'])
+ window.metaAttributesCache.set('ol-compileSettings', {
+ reducedTimeoutWarning: 'default',
+ compileTimeout: 20,
+ })
window.metaAttributesCache.set('ol-ExposedSettings', {
adminEmail: 'placeholder@example.com',
appName: 'Overleaf',
@@ -126,8 +128,12 @@ const preview: Preview = {
// render stories in iframes, to isolate modals
inlineStories: false,
},
- // Default to Bootstrap 3 styles
- bootstrap5: false,
+ options: {
+ storySort: {
+ method: 'alphabetical',
+ order: ['Shared'],
+ },
+ },
},
globalTypes: {
theme: {
@@ -139,50 +145,35 @@ const preview: Preview = {
items: [
{ value: 'main-', title: 'Default' },
{ value: 'main-light-', title: 'Light' },
- { value: 'main-ieee-', title: 'IEEE' },
],
},
},
},
loaders: [
- async ({ globals }) => {
- const { theme } = globals
-
+ async () => {
return {
- // NOTE: this uses `${theme}style.less` rather than `${theme}.less`
- // so that webpack only bundles files ending with "style.less"
- bootstrap3Style: await import(
- `!!to-string-loader!css-loader!less-loader!../../../services/web/frontend/stylesheets/${theme}style.less`
- ),
- // NOTE: this uses `${theme}style.scss` rather than `${theme}.scss`
- // so that webpack only bundles files ending with "style.scss"
- bootstrap5Style: await import(
- `!!to-string-loader!css-loader!resolve-url-loader!sass-loader!../../../services/web/frontend/stylesheets/bootstrap-5/${theme}style.scss`
+ mainStyle: await import(
+ // @ts-ignore
+ `!!to-string-loader!css-loader!resolve-url-loader!sass-loader!../../../services/web/frontend/stylesheets/bootstrap-5/main-style.scss`
),
}
},
],
decorators: [
(Story, context) => {
- const { bootstrap3Style, bootstrap5Style } = context.loaded
- const bootstrapVersion = Number(
- context.args[bootstrapVersionArg] ||
- (context.parameters.bootstrap5 ? 5 : 3)
- ) as 3 | 5
- const activeStyle =
- bootstrapVersion === 5 ? bootstrap5Style : bootstrap3Style
+ const { mainStyle } = context.loaded
- resetMeta(bootstrapVersion)
+ resetMeta()
return (
- <>
- {activeStyle && }
-
- >
+
+ {mainStyle && }
+
+
)
},
],
diff --git a/services/web/.storybook/utils/with-bootstrap-switcher.tsx b/services/web/.storybook/utils/with-bootstrap-switcher.tsx
deleted file mode 100644
index 2d6b6a5609..0000000000
--- a/services/web/.storybook/utils/with-bootstrap-switcher.tsx
+++ /dev/null
@@ -1,20 +0,0 @@
-import { Meta } from '@storybook/react'
-
-export const bootstrapVersionArg = 'bootstrapVersion'
-
-export const bsVersionDecorator: Meta = {
- argTypes: {
- [bootstrapVersionArg]: {
- name: 'Bootstrap Version',
- description: 'Bootstrap version for components',
- control: { type: 'inline-radio' },
- options: ['3', '5'],
- table: {
- defaultValue: { summary: '3' },
- },
- },
- },
- args: {
- [bootstrapVersionArg]: '3',
- },
-}
diff --git a/services/web/.storybook/utils/with-split-tests.tsx b/services/web/.storybook/utils/with-split-tests.tsx
index fa8b4bad52..0a7ee0bd72 100644
--- a/services/web/.storybook/utils/with-split-tests.tsx
+++ b/services/web/.storybook/utils/with-split-tests.tsx
@@ -3,11 +3,8 @@ import _ from 'lodash'
import { SplitTestContext } from '../../frontend/js/shared/context/split-test-context'
export const splitTestsArgTypes = {
- 'local-ccy-format-v2': {
- description: 'Use local currency formatting',
- control: { type: 'radio' as const },
- options: ['default', 'enabled'],
- },
+ // to be able to use this utility, you need to add the argTypes for each split test in this object
+ // Check the original implementation for an example: https://github.com/overleaf/internal/pull/17809
}
export const withSplitTests = (
diff --git a/services/web/Dockerfile b/services/web/Dockerfile
index 9f8ce157d6..5e58ab7ec2 100644
--- a/services/web/Dockerfile
+++ b/services/web/Dockerfile
@@ -1,6 +1,6 @@
# the base image is suitable for running web with /overleaf/services/web bind
# mounted
-FROM node:20.18.0 AS base
+FROM node:22.17.0 AS base
WORKDIR /overleaf/services/web
@@ -52,12 +52,12 @@ USER node
# the webpack image has deps+src+webpack artifacts
FROM dev AS webpack
USER root
-RUN OVERLEAF_CONFIG=/overleaf/services/web/config/settings.webpack.js npm run webpack:production
+RUN OVERLEAF_CONFIG=/overleaf/services/web/config/settings.webpack.js nice npm run webpack:production
# intermediate image for removing source maps ahead of copying into final production image
FROM webpack AS webpack-no-sourcemaps
-RUN find /overleaf/services/web/public -name '*.js.map' -delete
+RUN nice find /overleaf/services/web/public -name '*.js.map' -delete
# copy source code and precompile pug images
@@ -65,7 +65,7 @@ FROM deps-prod AS pug
COPY services/web /overleaf/services/web
# Omit Server Pro/CE specific scripts from SaaS image
RUN rm /overleaf/services/web/modules/server-ce-scripts -rf
-RUN OVERLEAF_CONFIG=/overleaf/services/web/config/settings.overrides.saas.js npm run precompile-pug
+RUN OVERLEAF_CONFIG=/overleaf/services/web/config/settings.overrides.saas.js nice npm run precompile-pug
# the web image with only production dependencies but no webpack production build, for development
diff --git a/services/web/Dockerfile.frontend b/services/web/Dockerfile.frontend
index 50620e6f26..6922653908 100644
--- a/services/web/Dockerfile.frontend
+++ b/services/web/Dockerfile.frontend
@@ -1,4 +1,4 @@
-FROM node:20.18.0
+FROM node:22.17.0
# Install Google Chrome
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
diff --git a/services/web/Dockerfile.frontend_ct b/services/web/Dockerfile.frontend_ct
deleted file mode 100644
index 49fb58843c..0000000000
--- a/services/web/Dockerfile.frontend_ct
+++ /dev/null
@@ -1,9 +0,0 @@
-ARG PROJECT_NAME
-ARG BRANCH_NAME
-ARG BUILD_NUMBER
-ARG CYPRESS_IMAGE
-
-FROM ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER AS dev
-
-FROM $CYPRESS_IMAGE
-COPY --from=dev /overleaf /overleaf
diff --git a/services/web/Makefile b/services/web/Makefile
index 995302dd24..f5a7542691 100644
--- a/services/web/Makefile
+++ b/services/web/Makefile
@@ -4,6 +4,7 @@ BUILD_NUMBER ?= local
BRANCH_NAME ?= $(shell git rev-parse --abbrev-ref HEAD)
PROJECT_NAME = web
BUILD_DIR_NAME = $(shell pwd | xargs basename | tr -cd '[a-zA-Z0-9_.\-]')
+PWD = $(shell pwd)
export OVERLEAF_CONFIG ?= /overleaf/services/web/test/acceptance/config/settings.test.saas.js
export BASE_CONFIG ?= ${OVERLEAF_CONFIG}
@@ -45,7 +46,7 @@ clean:
-COMPOSE_PROJECT_NAME=acceptance_modules_merged_saas_4_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down --rmi local
-COMPOSE_PROJECT_NAME=acceptance_modules_merged_server_ce_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down --rmi local
-COMPOSE_PROJECT_NAME=acceptance_modules_merged_server_pro_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down --rmi local
- -COMPOSE_PROJECT_NAME=frontend_test_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down --rmi local
+ -COMPOSE_PROJECT_NAME=test_frontend_ct_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down --rmi local
-COMPOSE_PROJECT_NAME=tar_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down --rmi local
clean_ci:
@@ -82,6 +83,21 @@ test_unit_app:
$(DOCKER_COMPOSE) run --name unit_test_$(BUILD_DIR_NAME) --rm test_unit
$(DOCKER_COMPOSE) down -v -t 0
+test_unit_mocha: export COMPOSE_PROJECT_NAME=unit_test_mocha_$(BUILD_DIR_NAME)
+test_unit_mocha:
+ $(DOCKER_COMPOSE) run --rm test_unit npm run test:unit:mocha
+ $(DOCKER_COMPOSE) down -v -t 0
+
+test_unit_esm: export COMPOSE_PROJECT_NAME=unit_test_esm_$(BUILD_DIR_NAME)
+test_unit_esm:
+ $(DOCKER_COMPOSE) run --rm test_unit npm run test:unit:esm
+ $(DOCKER_COMPOSE) down -v -t 0
+
+test_unit_esm_watch: export COMPOSE_PROJECT_NAME=unit_test_esm_watch_$(BUILD_DIR_NAME)
+test_unit_esm_watch:
+ $(DOCKER_COMPOSE) run --rm test_unit npm run test:unit:esm:watch
+ $(DOCKER_COMPOSE) down -v -t 0
+
TEST_SUITES = $(sort $(filter-out \
$(wildcard test/unit/src/helpers/*), \
$(wildcard test/unit/src/*/*)))
@@ -120,39 +136,47 @@ test_unit_module:
#
test_frontend:
- COMPOSE_PROJECT_NAME=frontend_test_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down -v -t 0
COMPOSE_PROJECT_NAME=frontend_test_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) run --rm test_frontend
COMPOSE_PROJECT_NAME=frontend_test_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down -v -t 0
#
# Frontend component tests in Cypress
#
-
-test_frontend_ct:
- COMPOSE_PROJECT_NAME=frontend_test_ct_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down -v -t 0
- COMPOSE_PROJECT_NAME=frontend_test_ct_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) run --rm test_frontend_ct
- COMPOSE_PROJECT_NAME=frontend_test_ct_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down -v -t 0
+# Local development: use $ make test_frontend_ct
+#
+TEST_FRONTEND_CT_VARIANTS = \
+ test_frontend_ct \
+ test_frontend_ct_core_other \
+ test_frontend_ct_core_features \
+ test_frontend_ct_modules \
+ test_frontend_ct_editor_other \
+ test_frontend_ct_editor_visual \
# Note: The below cypress targets are for CI only
build_test_frontend_ct:
- COMPOSE_PROJECT_NAME=frontend_test_ct_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) build test_frontend_ct
- # Note: The 2nd build should use the cache from the 1st build.
- COMPOSE_PROJECT_NAME=frontend_test_ct_editor_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) build test_frontend_ct
+ docker run --rm --volume /dev/shm:/dev/shm --user root $(IMAGE_CI) bash -ec 'tar -cC / overleaf | tar -xC /dev/shm'
-test_frontend_ct_core: export CYPRESS_RESULTS=./cypress/results/core
-test_frontend_ct_core: export CYPRESS_SPEC_PATTERN=./{test,modules/**/test}/frontend/**/*.spec.{js,jsx,ts,tsx}
-test_frontend_ct_core: export CYPRESS_EXCLUDE_SPEC_PATTERN=./test/frontend/features/source-editor/**/*.spec.{js,jsx,ts,tsx}
-test_frontend_ct_core:
- COMPOSE_PROJECT_NAME=frontend_test_ct_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down -v -t 0
- COMPOSE_PROJECT_NAME=frontend_test_ct_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) run --rm test_frontend_ct
- COMPOSE_PROJECT_NAME=frontend_test_ct_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down -v -t 0
+test_frontend_ct_core_other: export CYPRESS_RESULTS=./cypress/results/core
+test_frontend_ct_core_other: export CYPRESS_SPEC_PATTERN=./test/frontend/**/*.spec.{js,jsx,ts,tsx}
+test_frontend_ct_core_other: export CYPRESS_EXCLUDE_SPEC_PATTERN=./test/frontend/features/**/*.spec.{js,jsx,ts,tsx}
-test_frontend_ct_editor: export CYPRESS_RESULTS=./cypress/results/editor
-test_frontend_ct_editor: export CYPRESS_SPEC_PATTERN=./test/frontend/features/source-editor/**/*.spec.{js,jsx,ts,tsx}
-test_frontend_ct_editor:
- COMPOSE_PROJECT_NAME=frontend_test_ct_editor_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down -v -t 0
- COMPOSE_PROJECT_NAME=frontend_test_ct_editor_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) run --rm test_frontend_ct
- COMPOSE_PROJECT_NAME=frontend_test_ct_editor_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down -v -t 0
+test_frontend_ct_core_features: export CYPRESS_RESULTS=./cypress/results/core
+test_frontend_ct_core_features: export CYPRESS_SPEC_PATTERN=./test/frontend/features/**/*.spec.{js,jsx,ts,tsx}
+test_frontend_ct_core_features: export CYPRESS_EXCLUDE_SPEC_PATTERN=./test/frontend/features/source-editor/**/*.spec.{js,jsx,ts,tsx}
+
+test_frontend_ct_modules: export CYPRESS_RESULTS=./cypress/results/modules
+test_frontend_ct_modules: export CYPRESS_SPEC_PATTERN=./modules/**/test/frontend/**/*.spec.{js,jsx,ts,tsx}
+
+test_frontend_ct_editor_other: export CYPRESS_RESULTS=./cypress/results/editor_other
+test_frontend_ct_editor_other: export CYPRESS_SPEC_PATTERN=./test/frontend/features/source-editor/**/*.spec.{js,jsx,ts,tsx}
+test_frontend_ct_editor_other: export CYPRESS_EXCLUDE_SPEC_PATTERN=./test/frontend/features/source-editor/components/codemirror-editor-visual*.spec.{js,jsx,ts,tsx}
+
+test_frontend_ct_editor_visual: export CYPRESS_RESULTS=./cypress/results/editor_visual
+test_frontend_ct_editor_visual: export CYPRESS_SPEC_PATTERN=./test/frontend/features/source-editor/components/codemirror-editor-visual*.spec.{js,jsx,ts,tsx}
+
+$(TEST_FRONTEND_CT_VARIANTS):
+ COMPOSE_PROJECT_NAME=$@_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) run --rm test_frontend_ct
+ COMPOSE_PROJECT_NAME=$@_$(BUILD_DIR_NAME) $(DOCKER_COMPOSE) down -v -t 0
#
# Acceptance tests
@@ -431,15 +455,40 @@ format: format_styles
format_styles:
npm run --silent format:styles
+format: format_pug
+format_pug:
+ npm run --silent format:pug
+
format_fix:
npm run --silent format:fix
format_styles_fix:
npm run --silent format:styles:fix
+format_pug_fix:
+ npm run --silent format:pug:fix
+
format_in_docker:
$(RUN_LINT_FORMAT) make format -j2 --output-sync
+SHELLCHECK_OPTS = \
+ --shell=bash \
+ --external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+ @$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(PWD):/mnt -w /mnt \
+ koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+ @$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+ diff=$$(docker run --rm -v $(PWD):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+ if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+ elif [ -n "$$diff" ]; then echo "$$file"; \
+ else echo "\033[2m$$file\033[0m"; fi \
+ done
+
#
# Build & publish
#
@@ -540,4 +589,5 @@ $(MODULE_TARGETS):
test_unit_modules test_unit_module test_frontend \
test_acceptance test_acceptance_app test_acceptance_modules \
test_acceptance_module ci format format_fix lint \
+ shellcheck shellcheck_fix \
build publish tar
diff --git a/services/web/app.mjs b/services/web/app.mjs
index 528f2c079b..3f54cc36a8 100644
--- a/services/web/app.mjs
+++ b/services/web/app.mjs
@@ -1,5 +1,5 @@
// Metrics must be initialized before importing anything else
-import '@overleaf/metrics/initialize.js'
+import { metricsModuleImportStartTime } from '@overleaf/metrics/initialize.js'
import Modules from './app/src/infrastructure/Modules.js'
import metrics from '@overleaf/metrics'
@@ -18,6 +18,14 @@ import mongoose from './app/src/infrastructure/Mongoose.js'
import { triggerGracefulShutdown } from './app/src/infrastructure/GracefulShutdown.js'
import FileWriter from './app/src/infrastructure/FileWriter.js'
import { fileURLToPath } from 'node:url'
+import Features from './app/src/infrastructure/Features.js'
+
+metrics.gauge(
+ 'web_startup',
+ performance.now() - metricsModuleImportStartTime,
+ 1,
+ { path: 'imports' }
+)
logger.initialize(process.env.METRICS_APP_NAME || 'web')
logger.logger.serializers.user = Serializers.user
@@ -48,6 +56,32 @@ if (Settings.catchErrors) {
// Create ./data/dumpFolder if needed
FileWriter.ensureDumpFolderExists()
+// Validate combination of feature flags.
+Features.validateSettings()
+
+// handle SIGTERM for graceful shutdown in kubernetes
+process.on('SIGTERM', function (signal) {
+ triggerGracefulShutdown(Server.server, signal)
+})
+
+const beforeWaitForMongoAndGlobalBlobs = performance.now()
+try {
+ await Promise.all([
+ mongodb.connectionPromise,
+ mongoose.connectionPromise,
+ HistoryManager.loadGlobalBlobsPromise,
+ ])
+} catch (err) {
+ logger.fatal({ err }, 'Cannot connect to mongo. Exiting.')
+ process.exit(1)
+}
+metrics.gauge(
+ 'web_startup',
+ performance.now() - beforeWaitForMongoAndGlobalBlobs,
+ 1,
+ { path: 'waitForMongoAndGlobalBlobs' }
+)
+
const port = Settings.port || Settings.internal.web.port || 3000
const host = Settings.internal.web.host || '127.0.0.1'
if (process.argv[1] === fileURLToPath(import.meta.url)) {
@@ -59,42 +93,33 @@ if (process.argv[1] === fileURLToPath(import.meta.url)) {
PlansLocator.ensurePlansAreSetupCorrectly()
- Promise.all([
- mongodb.connectionPromise,
- mongoose.connectionPromise,
- HistoryManager.promises.loadGlobalBlobs(),
- ])
- .then(async () => {
- Server.server.listen(port, host, function () {
- logger.debug(`web starting up, listening on ${host}:${port}`)
- logger.debug(`${http.globalAgent.maxSockets} sockets enabled`)
- // wait until the process is ready before monitoring the event loop
- metrics.event_loop.monitor(logger)
- })
- QueueWorkers.start()
- await Modules.start()
- })
- .catch(err => {
- logger.fatal({ err }, 'Cannot connect to mongo. Exiting.')
- process.exit(1)
- })
+ Server.server.listen(port, host, function () {
+ logger.debug(`web starting up, listening on ${host}:${port}`)
+ logger.debug(`${http.globalAgent.maxSockets} sockets enabled`)
+ // wait until the process is ready before monitoring the event loop
+ metrics.event_loop.monitor(logger)
+
+ // Record metrics for the total startup time before listening on HTTP.
+ metrics.gauge(
+ 'web_startup',
+ performance.now() - metricsModuleImportStartTime,
+ 1,
+ { path: 'metricsModuleImportToHTTPListen' }
+ )
+ })
+ try {
+ QueueWorkers.start()
+ } catch (err) {
+ logger.fatal({ err }, 'failed to start queue processing')
+ }
+ try {
+ await Modules.start()
+ } catch (err) {
+ logger.fatal({ err }, 'failed to start web module background jobs')
+ }
}
// initialise site admin tasks
-Promise.all([
- mongodb.connectionPromise,
- mongoose.connectionPromise,
- HistoryManager.promises.loadGlobalBlobs(),
-])
- .then(() => SiteAdminHandler.initialise())
- .catch(err => {
- logger.fatal({ err }, 'Cannot connect to mongo. Exiting.')
- process.exit(1)
- })
-
-// handle SIGTERM for graceful shutdown in kubernetes
-process.on('SIGTERM', function (signal) {
- triggerGracefulShutdown(Server.server, signal)
-})
+SiteAdminHandler.initialise()
export default Server.server
diff --git a/services/web/app/src/Features/Analytics/AccountMappingHelper.js b/services/web/app/src/Features/Analytics/AccountMappingHelper.js
new file mode 100644
index 0000000000..5967149b8e
--- /dev/null
+++ b/services/web/app/src/Features/Analytics/AccountMappingHelper.js
@@ -0,0 +1,97 @@
+const mappings = new Map([
+ ['salesforce_id', generateSubscriptionToSalesforceMapping],
+ ['v1_id', generateSubscriptionToV1Mapping],
+ ['recurlySubscription_id', generateSubscriptionToRecurlyMapping],
+])
+
+/**
+ * @typedef {(import('./types.d.ts').AccountMapping)} AccountMapping
+ */
+
+/**
+ *
+ * @param {Object} subscription
+ * @param {Object} updatedSubscription
+ * @return {Array}
+ */
+function extractAccountMappingsFromSubscription(
+ subscription,
+ updatedSubscription
+) {
+ const accountMappings = []
+ mappings.forEach((generateMapping, param) => {
+ if (updatedSubscription[param] || updatedSubscription[param] === '') {
+ if (subscription[param] !== updatedSubscription[param]) {
+ accountMappings.push(
+ generateMapping(subscription.id, updatedSubscription[param])
+ )
+ }
+ }
+ })
+ return accountMappings
+}
+
+function generateV1Mapping(v1Id, salesforceId, createdAt) {
+ return {
+ source: 'salesforce',
+ sourceEntity: 'account',
+ sourceEntityId: salesforceId,
+ target: 'v1',
+ targetEntity: 'university',
+ targetEntityId: v1Id,
+ createdAt,
+ }
+}
+
+function generateSubscriptionToV1Mapping(subscriptionId, v1Id) {
+ return {
+ source: 'v1',
+ sourceEntity: 'university',
+ sourceEntityId: v1Id,
+ target: 'v2',
+ targetEntity: 'subscription',
+ targetEntityId: subscriptionId,
+ createdAt: new Date().toISOString(),
+ }
+}
+
+function generateSubscriptionToSalesforceMapping(subscriptionId, salesforceId) {
+ return {
+ source: 'salesforce',
+ sourceEntity: 'account',
+ sourceEntityId: salesforceId,
+ target: 'v2',
+ targetEntity: 'subscription',
+ targetEntityId: subscriptionId,
+ createdAt: new Date().toISOString(),
+ }
+}
+
+/**
+ *
+ * @param {string} subscriptionId
+ * @param {string} recurlyId
+ * @param {string} [createdAt] - Should be an ISO date
+ * @return {AccountMapping}
+ */
+function generateSubscriptionToRecurlyMapping(
+ subscriptionId,
+ recurlyId,
+ createdAt = new Date().toISOString()
+) {
+ return {
+ source: 'recurly',
+ sourceEntity: 'subscription',
+ sourceEntityId: recurlyId,
+ target: 'v2',
+ targetEntity: 'subscription',
+ targetEntityId: subscriptionId,
+ createdAt,
+ }
+}
+
+module.exports = {
+ extractAccountMappingsFromSubscription,
+ generateV1Mapping,
+ generateSubscriptionToRecurlyMapping,
+}
diff --git a/services/web/app/src/Features/Analytics/AccountMappingHelper.mjs b/services/web/app/src/Features/Analytics/AccountMappingHelper.mjs
deleted file mode 100644
index 76e4bc51bc..0000000000
--- a/services/web/app/src/Features/Analytics/AccountMappingHelper.mjs
+++ /dev/null
@@ -1,71 +0,0 @@
-export function extractAccountMappingsFromSubscription(
- subscription,
- updatedSubscription
-) {
- const accountMappings = []
- if (
- updatedSubscription.salesforce_id ||
- updatedSubscription.salesforce_id === ''
- ) {
- if (subscription.salesforce_id !== updatedSubscription.salesforce_id) {
- accountMappings.push(
- generateSubscriptionToSalesforceMapping(
- subscription.id,
- updatedSubscription.salesforce_id
- )
- )
- }
- }
- if (updatedSubscription.v1_id || updatedSubscription.v1_id === '') {
- if (subscription.v1_id !== updatedSubscription.v1_id) {
- accountMappings.push(
- generateSubscriptionToV1Mapping(
- subscription.id,
- updatedSubscription.v1_id
- )
- )
- }
- }
- return accountMappings
-}
-
-export function generateV1Mapping(v1Id, salesforceId, createdAt) {
- return {
- source: 'salesforce',
- sourceEntity: 'account',
- sourceEntityId: salesforceId,
- target: 'v1',
- targetEntity: 'university',
- targetEntityId: v1Id,
- createdAt,
- }
-}
-
-function generateSubscriptionToV1Mapping(subscriptionId, v1Id) {
- return {
- source: 'v1',
- sourceEntity: 'university',
- sourceEntityId: v1Id,
- target: 'v2',
- targetEntity: 'subscription',
- targetEntityId: subscriptionId,
- createdAt: new Date().toISOString(),
- }
-}
-
-function generateSubscriptionToSalesforceMapping(subscriptionId, salesforceId) {
- return {
- source: 'salesforce',
- sourceEntity: 'account',
- sourceEntityId: salesforceId,
- target: 'v2',
- targetEntity: 'subscription',
- targetEntityId: subscriptionId,
- createdAt: new Date().toISOString(),
- }
-}
-
-export default {
- extractAccountMappingsFromSubscription,
- generateV1Mapping,
-}
diff --git a/services/web/app/src/Features/Analytics/AnalyticsController.mjs b/services/web/app/src/Features/Analytics/AnalyticsController.mjs
index 8ae54518b9..7d9188f64e 100644
--- a/services/web/app/src/Features/Analytics/AnalyticsController.mjs
+++ b/services/web/app/src/Features/Analytics/AnalyticsController.mjs
@@ -4,7 +4,7 @@ import SessionManager from '../Authentication/SessionManager.js'
import GeoIpLookup from '../../infrastructure/GeoIpLookup.js'
import Features from '../../infrastructure/Features.js'
import { expressify } from '@overleaf/promise-utils'
-import { generateV1Mapping } from './AccountMappingHelper.mjs'
+import AccountMappingHelper from './AccountMappingHelper.js'
async function registerSalesforceMapping(req, res, next) {
if (!Features.hasFeature('analytics')) {
@@ -12,7 +12,7 @@ async function registerSalesforceMapping(req, res, next) {
}
const { createdAt, salesforceId, v1Id } = req.body
AnalyticsManager.registerAccountMapping(
- generateV1Mapping(v1Id, salesforceId, createdAt)
+ AccountMappingHelper.generateV1Mapping(v1Id, salesforceId, createdAt)
)
res.sendStatus(202)
}
diff --git a/services/web/app/src/Features/Analytics/AnalyticsManager.js b/services/web/app/src/Features/Analytics/AnalyticsManager.js
index 0a1cd32e55..4afdb08f83 100644
--- a/services/web/app/src/Features/Analytics/AnalyticsManager.js
+++ b/services/web/app/src/Features/Analytics/AnalyticsManager.js
@@ -146,16 +146,14 @@ function setUserPropertyForSessionInBackground(session, property, value) {
})
}
+/**
+ * @typedef {(import('./types').AccountMapping)} AccountMapping
+ */
+
/**
* Register mapping between two accounts.
*
- * @param {object} payload - The event payload to send to Analytics
- * @param {string} payload.source - The type of account linked from
- * @param {string} payload.sourceId - The ID of the account linked from
- * @param {string} payload.target - The type of account linked to
- * @param {string} payload.targetId - The ID of the account linked to
- * @param {Date} payload.createdAt - The date the mapping was created
- * @property
+ * @param {AccountMapping} payload - The event payload to send to Analytics
*/
function registerAccountMapping({
source,
diff --git a/services/web/app/src/Features/Analytics/types.d.ts b/services/web/app/src/Features/Analytics/types.d.ts
new file mode 100644
index 0000000000..44df848bd0
--- /dev/null
+++ b/services/web/app/src/Features/Analytics/types.d.ts
@@ -0,0 +1,9 @@
+export type AccountMapping = {
+ source: string
+ sourceEntity: string
+ sourceEntityId: string
+ target: string
+ targetEntity: string
+ targetEntityId: string
+ createdAt: string
+}
diff --git a/services/web/app/src/Features/Authentication/AuthenticationController.js b/services/web/app/src/Features/Authentication/AuthenticationController.js
index 0df10e0715..a190ddab5a 100644
--- a/services/web/app/src/Features/Authentication/AuthenticationController.js
+++ b/services/web/app/src/Features/Authentication/AuthenticationController.js
@@ -36,7 +36,22 @@ function send401WithChallenge(res) {
function checkCredentials(userDetailsMap, user, password) {
const expectedPassword = userDetailsMap.get(user)
const userExists = userDetailsMap.has(user) && expectedPassword // user exists with a non-null password
- const isValid = userExists && tsscmp(expectedPassword, password)
+
+ let isValid = false
+ if (userExists) {
+ if (Array.isArray(expectedPassword)) {
+ const isValidPrimary = Boolean(
+ expectedPassword[0] && tsscmp(expectedPassword[0], password)
+ )
+ const isValidFallback = Boolean(
+ expectedPassword[1] && tsscmp(expectedPassword[1], password)
+ )
+ isValid = isValidPrimary || isValidFallback
+ } else {
+ isValid = tsscmp(expectedPassword, password)
+ }
+ }
+
if (!isValid) {
logger.err({ user }, 'invalid login details')
}
@@ -82,6 +97,7 @@ const AuthenticationController = {
analyticsId: user.analyticsId || user._id,
alphaProgram: user.alphaProgram || undefined, // only store if set
betaProgram: user.betaProgram || undefined, // only store if set
+ externalAuth: user.externalAuth || false,
}
if (user.isAdmin) {
lightUser.isAdmin = true
@@ -357,6 +373,7 @@ const AuthenticationController = {
return AuthenticationController._redirectToLoginOrRegisterPage(req, res)
} else {
req.user = SessionManager.getSessionUser(req.session)
+ req.logger?.addFields({ userId: req.user._id })
return next()
}
}
diff --git a/services/web/app/src/Features/Authentication/AuthenticationManager.js b/services/web/app/src/Features/Authentication/AuthenticationManager.js
index 33827f1673..6ac510986c 100644
--- a/services/web/app/src/Features/Authentication/AuthenticationManager.js
+++ b/services/web/app/src/Features/Authentication/AuthenticationManager.js
@@ -409,10 +409,6 @@ const AuthenticationManager = {
if (!_exceedsMaximumLengthRatio(password, MAX_SIMILARITY, emailPart)) {
const similarity = DiffHelper.stringSimilarity(password, emailPart)
if (similarity > MAX_SIMILARITY) {
- logger.warn(
- { email, emailPart, similarity, maxSimilarity: MAX_SIMILARITY },
- 'Password too similar to email'
- )
return new Error('password is too similar to email')
}
}
diff --git a/services/web/app/src/Features/Authorization/AuthorizationManager.js b/services/web/app/src/Features/Authorization/AuthorizationManager.js
index 565788c3ba..22d92ea9d9 100644
--- a/services/web/app/src/Features/Authorization/AuthorizationManager.js
+++ b/services/web/app/src/Features/Authorization/AuthorizationManager.js
@@ -10,6 +10,7 @@ const PublicAccessLevels = require('./PublicAccessLevels')
const Errors = require('../Errors/Errors')
const { hasAdminAccess } = require('../Helpers/AdminAuthorizationHelper')
const Settings = require('@overleaf/settings')
+const DocumentUpdaterHandler = require('../DocumentUpdater/DocumentUpdaterHandler')
function isRestrictedUser(
userId,
@@ -87,9 +88,54 @@ async function getPrivilegeLevelForProject(
opts = {}
) {
if (userId) {
- return getPrivilegeLevelForProjectWithUser(userId, projectId, opts)
+ return await getPrivilegeLevelForProjectWithUser(
+ userId,
+ projectId,
+ null,
+ opts
+ )
} else {
- return getPrivilegeLevelForProjectWithoutUser(projectId, token, opts)
+ return await getPrivilegeLevelForProjectWithoutUser(projectId, token, opts)
+ }
+}
+
+/**
+ * Get the privilege level that the user has for the project.
+ *
+ * @param userId - The id of the user that wants to access the project.
+ * @param projectId - The id of the project to be accessed.
+ * @param {string} token
+ * @param {ProjectAccess} projectAccess
+ * @param {Object} opts
+ * @param {boolean} opts.ignoreSiteAdmin - Do not consider whether the user is
+ * a site admin.
+ * @param {boolean} opts.ignorePublicAccess - Do not consider the project is
+ * publicly accessible.
+ *
+ * @returns {string|boolean} The privilege level. One of "owner",
+ * "readAndWrite", "readOnly" or false.
+ */
+async function getPrivilegeLevelForProjectWithProjectAccess(
+ userId,
+ projectId,
+ token,
+ projectAccess,
+ opts = {}
+) {
+ if (userId) {
+ return await getPrivilegeLevelForProjectWithUser(
+ userId,
+ projectId,
+ projectAccess,
+ opts
+ )
+ } else {
+ return await _getPrivilegeLevelForProjectWithoutUserWithPublicAccessLevel(
+ projectId,
+ token,
+ projectAccess.publicAccessLevel(),
+ opts
+ )
}
}
@@ -97,6 +143,7 @@ async function getPrivilegeLevelForProject(
async function getPrivilegeLevelForProjectWithUser(
userId,
projectId,
+ projectAccess,
opts = {}
) {
if (!opts.ignoreSiteAdmin) {
@@ -105,11 +152,11 @@ async function getPrivilegeLevelForProjectWithUser(
}
}
- const privilegeLevel =
- await CollaboratorsGetter.promises.getMemberIdPrivilegeLevel(
- userId,
- projectId
- )
+ projectAccess =
+ projectAccess ||
+ (await CollaboratorsGetter.promises.getProjectAccess(projectId))
+
+ const privilegeLevel = projectAccess.privilegeLevelForUser(userId)
if (privilegeLevel && privilegeLevel !== PrivilegeLevels.NONE) {
// The user has direct access
return privilegeLevel
@@ -118,7 +165,7 @@ async function getPrivilegeLevelForProjectWithUser(
if (!opts.ignorePublicAccess) {
// Legacy public-access system
// User is present (not anonymous), but does not have direct access
- const publicAccessLevel = await getPublicAccessLevel(projectId)
+ const publicAccessLevel = projectAccess.publicAccessLevel()
if (publicAccessLevel === PublicAccessLevels.READ_ONLY) {
return PrivilegeLevels.READ_ONLY
}
@@ -136,7 +183,21 @@ async function getPrivilegeLevelForProjectWithoutUser(
token,
opts = {}
) {
- const publicAccessLevel = await getPublicAccessLevel(projectId)
+ return await _getPrivilegeLevelForProjectWithoutUserWithPublicAccessLevel(
+ projectId,
+ token,
+ await getPublicAccessLevel(projectId),
+ opts
+ )
+}
+
+// User is Anonymous, Try Token-based access
+async function _getPrivilegeLevelForProjectWithoutUserWithPublicAccessLevel(
+ projectId,
+ token,
+ publicAccessLevel,
+ opts = {}
+) {
if (!opts.ignorePublicAccess) {
if (publicAccessLevel === PublicAccessLevels.READ_ONLY) {
// Legacy public read-only access for anonymous user
@@ -148,7 +209,7 @@ async function getPrivilegeLevelForProjectWithoutUser(
}
}
if (publicAccessLevel === PublicAccessLevels.TOKEN_BASED) {
- return getPrivilegeLevelForProjectWithToken(projectId, token)
+ return await getPrivilegeLevelForProjectWithToken(projectId, token)
}
// Deny anonymous user access
@@ -201,6 +262,19 @@ async function canUserWriteProjectContent(userId, projectId, token) {
)
}
+async function canUserWriteOrReviewProjectContent(userId, projectId, token) {
+ const privilegeLevel = await getPrivilegeLevelForProject(
+ userId,
+ projectId,
+ token
+ )
+ return (
+ privilegeLevel === PrivilegeLevels.OWNER ||
+ privilegeLevel === PrivilegeLevels.READ_AND_WRITE ||
+ privilegeLevel === PrivilegeLevels.REVIEW
+ )
+}
+
async function canUserWriteProjectSettings(userId, projectId, token) {
const privilegeLevel = await getPrivilegeLevelForProject(
userId,
@@ -240,9 +314,45 @@ async function isUserSiteAdmin(userId) {
return hasAdminAccess(user)
}
+async function canUserDeleteOrResolveThread(
+ userId,
+ projectId,
+ docId,
+ threadId,
+ token
+) {
+ const privilegeLevel = await getPrivilegeLevelForProject(
+ userId,
+ projectId,
+ token,
+ { ignorePublicAccess: true }
+ )
+ if (
+ privilegeLevel === PrivilegeLevels.OWNER ||
+ privilegeLevel === PrivilegeLevels.READ_AND_WRITE
+ ) {
+ return true
+ }
+
+ if (privilegeLevel !== PrivilegeLevels.REVIEW) {
+ return false
+ }
+
+ const comment = await DocumentUpdaterHandler.promises.getComment(
+ projectId,
+ docId,
+ threadId
+ )
+ return comment.metadata.user_id === userId
+}
+
module.exports = {
canUserReadProject: callbackify(canUserReadProject),
canUserWriteProjectContent: callbackify(canUserWriteProjectContent),
+ canUserWriteOrReviewProjectContent: callbackify(
+ canUserWriteOrReviewProjectContent
+ ),
+ canUserDeleteOrResolveThread: callbackify(canUserDeleteOrResolveThread),
canUserWriteProjectSettings: callbackify(canUserWriteProjectSettings),
canUserRenameProject: callbackify(canUserRenameProject),
canUserAdminProject: callbackify(canUserAdminProject),
@@ -253,10 +363,13 @@ module.exports = {
promises: {
canUserReadProject,
canUserWriteProjectContent,
+ canUserWriteOrReviewProjectContent,
+ canUserDeleteOrResolveThread,
canUserWriteProjectSettings,
canUserRenameProject,
canUserAdminProject,
getPrivilegeLevelForProject,
+ getPrivilegeLevelForProjectWithProjectAccess,
isRestrictedUserForProject,
isUserSiteAdmin,
},
diff --git a/services/web/app/src/Features/Authorization/AuthorizationMiddleware.js b/services/web/app/src/Features/Authorization/AuthorizationMiddleware.js
index 6b2d2ab920..02043442c8 100644
--- a/services/web/app/src/Features/Authorization/AuthorizationMiddleware.js
+++ b/services/web/app/src/Features/Authorization/AuthorizationMiddleware.js
@@ -103,6 +103,35 @@ async function ensureUserCanWriteProjectSettings(req, res, next) {
next()
}
+async function ensureUserCanDeleteOrResolveThread(req, res, next) {
+ const projectId = _getProjectId(req)
+ const docId = _getDocId(req)
+ const threadId = _getThreadId(req)
+ const userId = _getUserId(req)
+ const token = TokenAccessHandler.getRequestToken(req, projectId)
+ const canDeleteThread =
+ await AuthorizationManager.promises.canUserDeleteOrResolveThread(
+ userId,
+ projectId,
+ docId,
+ threadId,
+ token
+ )
+ if (canDeleteThread) {
+ logger.debug(
+ { userId, projectId },
+ 'allowing user to delete or resolve a comment thread'
+ )
+ return next()
+ }
+
+ logger.debug(
+ { userId, projectId, threadId },
+ 'denying user to delete or resolve a comment thread'
+ )
+ return HttpErrorHandler.forbidden(req, res)
+}
+
async function ensureUserCanWriteProjectContent(req, res, next) {
const projectId = _getProjectId(req)
const userId = _getUserId(req)
@@ -127,6 +156,32 @@ async function ensureUserCanWriteProjectContent(req, res, next) {
HttpErrorHandler.forbidden(req, res)
}
+async function ensureUserCanWriteOrReviewProjectContent(req, res, next) {
+ const projectId = _getProjectId(req)
+ const userId = _getUserId(req)
+ const token = TokenAccessHandler.getRequestToken(req, projectId)
+
+ const canWriteOrReviewProjectContent =
+ await AuthorizationManager.promises.canUserWriteOrReviewProjectContent(
+ userId,
+ projectId,
+ token
+ )
+ if (canWriteOrReviewProjectContent) {
+ logger.debug(
+ { userId, projectId },
+ 'allowing user write or review access to project content'
+ )
+ return next()
+ }
+
+ logger.debug(
+ { userId, projectId },
+ 'denying user write or review access to project content'
+ )
+ return HttpErrorHandler.forbidden(req, res)
+}
+
async function ensureUserCanAdminProject(req, res, next) {
const projectId = _getProjectId(req)
const userId = _getUserId(req)
@@ -166,6 +221,28 @@ function _getProjectId(req) {
return projectId
}
+function _getDocId(req) {
+ const docId = req.params.doc_id
+ if (!docId) {
+ throw new Error('Expected doc_id in request parameters')
+ }
+ if (!ObjectId.isValid(docId)) {
+ throw new Errors.NotFoundError(`invalid docId: ${docId}`)
+ }
+ return docId
+}
+
+function _getThreadId(req) {
+ const threadId = req.params.thread_id
+ if (!threadId) {
+ throw new Error('Expected thread_id in request parameters')
+ }
+ if (!ObjectId.isValid(threadId)) {
+ throw new Errors.NotFoundError(`invalid threadId: ${threadId}`)
+ }
+ return threadId
+}
+
function _getUserId(req) {
return (
SessionManager.getLoggedInUserId(req.session) ||
@@ -200,9 +277,15 @@ module.exports = {
ensureUserCanWriteProjectSettings: expressify(
ensureUserCanWriteProjectSettings
),
+ ensureUserCanDeleteOrResolveThread: expressify(
+ ensureUserCanDeleteOrResolveThread
+ ),
ensureUserCanWriteProjectContent: expressify(
ensureUserCanWriteProjectContent
),
+ ensureUserCanWriteOrReviewProjectContent: expressify(
+ ensureUserCanWriteOrReviewProjectContent
+ ),
ensureUserCanAdminProject: expressify(ensureUserCanAdminProject),
ensureUserIsSiteAdmin: expressify(ensureUserIsSiteAdmin),
restricted,
diff --git a/services/web/app/src/Features/Authorization/PermissionsController.js b/services/web/app/src/Features/Authorization/PermissionsController.js
index e8d4aff809..27b99081c6 100644
--- a/services/web/app/src/Features/Authorization/PermissionsController.js
+++ b/services/web/app/src/Features/Authorization/PermissionsController.js
@@ -8,10 +8,18 @@ const {
const { assertUserPermissions } = require('./PermissionsManager').promises
const Modules = require('../../infrastructure/Modules')
const { expressify } = require('@overleaf/promise-utils')
+const Features = require('../../infrastructure/Features')
+
+/**
+ * @typedef {(import('express').Request)} Request
+ * @typedef {(import('express').Response)} Response
+ * @typedef {(import('express').NextFunction)} NextFunction
+ * @typedef {import('./PermissionsManager').Capability} Capability
+ */
/**
* Function that returns middleware to add an `assertPermission` function to the request object to check if the user has a specific capability.
- * @returns {Function} The middleware function that adds the `assertPermission` function to the request object.
+ * @returns {() => (req: Request, res: Response, next: NextFunction) => void} The middleware function that adds the `assertPermission` function to the request object.
*/
function useCapabilities() {
const middleware = async function (req, res, next) {
@@ -29,12 +37,15 @@ function useCapabilities() {
return next()
}
try {
- let results = await Modules.promises.hooks.fire(
+ /**
+ * @type {{groupPolicy: Record}[][]}
+ */
+ const hookResponses = await Modules.promises.hooks.fire(
'getGroupPolicyForUser',
req.user
)
// merge array of all results from all modules
- results = results.flat()
+ const results = hookResponses.flat()
if (results.length > 0) {
// get the combined group policy applying to the user
@@ -69,8 +80,8 @@ function useCapabilities() {
/**
* Function that returns middleware to check if the user has permission to access a resource.
- * @param {[string]} requiredCapabilities - the capabilities required to access the resource.
- * @returns {Function} The middleware function that checks if the user has the required capabilities.
+ * @param {...Capability} requiredCapabilities - the capabilities required to access the resource.
+ * @returns {(req: Request, res: Response, next: NextFunction) => void} The middleware function that checks if the user has the required capabilities.
*/
function requirePermission(...requiredCapabilities) {
if (
@@ -79,7 +90,15 @@ function requirePermission(...requiredCapabilities) {
) {
throw new Error('invalid required capabilities')
}
+ /**
+ * @param {Request} req
+ * @param {Response} res
+ * @param {NextFunction} next
+ */
const doRequest = async function (req, res, next) {
+ if (!Features.hasFeature('saas')) {
+ return next()
+ }
if (!req.user) {
return next(new Error('no user'))
}
diff --git a/services/web/app/src/Features/Authorization/PermissionsManager.js b/services/web/app/src/Features/Authorization/PermissionsManager.js
index 49c19369ce..aad1021b24 100644
--- a/services/web/app/src/Features/Authorization/PermissionsManager.js
+++ b/services/web/app/src/Features/Authorization/PermissionsManager.js
@@ -45,16 +45,23 @@ const { callbackify } = require('util')
const { ForbiddenError } = require('../Errors/Errors')
const Modules = require('../../infrastructure/Modules')
+/**
+ * @typedef {(import('../../../../types/capabilities').Capability)} Capability
+ */
+
+/** @type {Map>} */
const POLICY_TO_CAPABILITY_MAP = new Map()
const POLICY_TO_VALIDATOR_MAP = new Map()
+/** @type {Map} */
const DEFAULT_PERMISSIONS = new Map()
+/** @type {Set} */
const ALLOWED_PROPERTIES = new Set()
/**
* Throws an error if the given capability is not registered.
*
* @private
- * @param {string} capability - The name of the capability to check.
+ * @param {Capability} capability - The name of the capability to check.
* @throws {Error} If the capability is not registered.
*/
function ensureCapabilityExists(capability) {
@@ -63,10 +70,29 @@ function ensureCapabilityExists(capability) {
}
}
+/**
+ * Validates an group policy object
+ *
+ * @param {Record} policies - An object containing policy names and booleans
+ * as key-value entries.
+ * @throws {Error} if the `policies` object contains a policy that is not
+ * registered, or the policy value is not a boolean
+ */
+function validatePolicies(policies) {
+ for (const [policy, value] of Object.entries(policies)) {
+ if (!POLICY_TO_CAPABILITY_MAP.has(policy)) {
+ throw new Error(`unknown policy: ${policy}`)
+ }
+ if (typeof value !== 'boolean') {
+ throw new Error(`policy value must be a boolean: ${policy} = ${value}`)
+ }
+ }
+}
+
/**
* Registers a new capability with the given name and options.
*
- * @param {string} name - The name of the capability to register.
+ * @param {Capability} name - The name of the capability to register.
* @param {Object} options - The options for the capability.
* @param {boolean} options.default - The default value for the capability
* (required).
@@ -89,7 +115,7 @@ function registerCapability(name, options) {
* Registers a new policy with the given name, capabilities, and options.
*
* @param {string} name - The name of the policy to register.
- * @param {Object} capabilities - The capabilities for the policy.
+ * @param {Partial>|Map} capabilities - The capabilities for the policy.
* @param {Object} [options] - The options for the policy.
* @param {Function?} [options.validator] - The optional validator function for the
* policy.
@@ -103,10 +129,11 @@ function registerPolicy(name, capabilities, options = {}) {
if (POLICY_TO_CAPABILITY_MAP.has(name)) {
throw new Error(`policy already registered: ${name}`)
}
+
+ /** @type {[Capability, boolean][]} */
+ const entries = Object.entries(capabilities)
// check that all the entries in the capability set exist and are booleans
- for (const [capabilityName, capabilityValue] of Object.entries(
- capabilities
- )) {
+ for (const [capabilityName, capabilityValue] of entries) {
// check that the capability exists (look in the default permissions)
if (!DEFAULT_PERMISSIONS.has(capabilityName)) {
throw new Error(`unknown capability: ${capabilityName}`)
@@ -139,19 +166,18 @@ function registerAllowedProperty(name) {
/**
* returns the set of allowed properties that have been registered
- *
- * @returns {Set} ALLOWED_PROPERTIES
*/
function getAllowedProperties() {
return ALLOWED_PROPERTIES
}
+
/**
* Returns an array of policy names that are enforced based on the provided
* group policy object.
*
* @private
- * @param {Object} groupPolicy - The group policy object to check.
- * @returns {Array} An array of policy names that are enforced.
+ * @param {Partial>} groupPolicy - The group policy object to check.
+ * @returns {Capability[]} An array of policy names that are enforced.
*/
function getEnforcedPolicyNames(groupPolicy = {}) {
if (!groupPolicy) {
@@ -173,7 +199,7 @@ function getEnforcedPolicyNames(groupPolicy = {}) {
* @private
* @param {string} policyName - The name of the policy to retrieve the
* capability value from.
- * @param {string} capability - The name of the capability to retrieve the value
+ * @param {Capability} capability - The name of the capability to retrieve the value
* for.
* @returns {boolean | undefined} The value of the capability for the policy, or
* undefined if the policy or capability is not found.
@@ -186,7 +212,7 @@ function getCapabilityValueFromPolicy(policyName, capability) {
* Returns the default value for the specified capability.
*
* @private
- * @param {string} capability - The name of the capability to retrieve the
+ * @param {Capability} capability - The name of the capability to retrieve the
* default value for.
* @returns {boolean | undefined} The default value for the capability, or
* undefined if the capability is not found.
@@ -203,9 +229,10 @@ function getValidatorFromPolicy(policyName) {
* Returns a set of default capabilities based on the DEFAULT_PERMISSIONS map.
*
* @private
- * @returns {Set} A set of default capabilities.
+ * @returns {Set} A set of default capabilities.
*/
function getDefaultCapabilities() {
+ /** @type {Set} */
const defaultCapabilities = new Set()
for (const [
capabilityName,
@@ -223,7 +250,7 @@ function getDefaultCapabilities() {
* which are not allowed by the policy.
*
* @private
- * @param {Set} capabilitySet - The set of capabilities to apply the policy to.
+ * @param {Set} capabilitySet - The set of capabilities to apply the policy to.
* @param {string} policyName - The name of the policy to apply.
* @throws {Error} If the policy is unknown.
*/
@@ -262,10 +289,11 @@ function getUserCapabilities(groupPolicy) {
/**
* Combines an array of group policies into a single policy object.
*
- * @param {Array} groupPolicies - An array of group policies.
- * @returns {Object} - The combined group policy object.
+ * @param {Record[]} groupPolicies - An array of group policies.
+ * @returns {Record