overleaf-cep/services/clsi/app/js/OutputFileArchiveManager.js
Jakob Ackermann b538d56591 [clsi-cache] backend (#24388)
* [clsi-cache] initial revision of the clsi-cache service

* [clsi] send output files to clsi-cache and import from clsi-cache

* [web] pass editorId to clsi

* [web] clear clsi-cache when clearing clsi cache

* [web] add split-tests for controlling clsi-cache rollout

* [web] populate clsi-cache when cloning/creating project from template

* [clsi-cache] produce less noise when populating cache hits 404

* [clsi-cache] push docker image to AR

* [clsi-cache] push docker image to AR

* [clsi-cache] allow compileGroup in job payload

* [clsi-cache] set X-Zone header from latest endpoint

* [clsi-cache] use method POST for /enqueue endpoint

* [web] populate clsi-cache in zone b with template data

* [clsi-cache] limit number of editors per project/user folder to 10

* [web] clone: populate the clsi-cache unless the TeXLive release changed

* [clsi-cache] keep user folder when clearing cache as anonymous user

* [clsi] download old output.tar.gz when synctex finds empty compile dir

* [web] fix lint

* [clsi-cache] multi-zonal lookup of single build output

* [clsi-cache] add more validation and limits

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [clsi] do not include clsi-cache tar-ball in output.zip

* [clsi-cache] fix reference after remaining constant

Co-authored-by: Alf Eaton <alf.eaton@overleaf.com>

* [web] consolidate validation of filename into ClsiCacheHandler

* [clsi-cache] extend metrics and event tracking

- break down most of the clsi metrics by label
  - compile=initial - new compile dir without previous output files
  - compile=recompile - recompile in existing compile dir
  - compile=from-cache - compile using previous clsi-cache
- extend segmentation on compile-result-backend event
  - isInitialCompile=true - found new compile dir at start of request
  - restoredClsiCache=true - restored compile dir from clsi-cache

* [clsi] rename metrics labels for download of clsi-cache

This is in preparation for synctex changes.

* [clsi] use constant for limit of entries in output.tar.gz

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>

* [clsi-cache] fix cloning of project cache

---------

Co-authored-by: Brian Gough <brian.gough@overleaf.com>
Co-authored-by: Alf Eaton <alf.eaton@overleaf.com>
Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>
GitOrigin-RevId: 4901a65497af13be1549af7f38ceee3188fcf881
2025-04-10 08:05:17 +00:00

113 lines
3 KiB
JavaScript

const archiver = require('archiver')
const OutputCacheManager = require('./OutputCacheManager')
const OutputFileFinder = require('./OutputFileFinder')
const Settings = require('@overleaf/settings')
const { open } = require('node:fs/promises')
const { NotFoundError } = require('./Errors')
const logger = require('@overleaf/logger')
// NOTE: Updating this list requires a corresponding change in
// * services/web/frontend/js/features/pdf-preview/util/file-list.ts
const ignoreFiles = ['output.fls', 'output.fdb_latexmk']
function getContentDir(projectId, userId) {
let subDir
if (userId != null) {
subDir = `${projectId}-${userId}`
} else {
subDir = projectId
}
return `${Settings.path.outputDir}/${subDir}/`
}
module.exports = {
async archiveFilesForBuild(projectId, userId, build) {
logger.debug({ projectId, userId, build }, 'Will create zip file')
const contentDir = getContentDir(projectId, userId)
const outputFiles = await this._getAllOutputFiles(
contentDir,
projectId,
userId,
build
)
const archive = archiver('zip')
archive.on('error', err => {
logger.warn(
{ err, projectId, userId, build },
'error emitted when creating output files archive'
)
})
archive.on('warning', err => {
logger.warn(
{ err, projectId, userId, build },
'warning emitted when creating output files archive'
)
})
const missingFiles = []
for (const { path } of outputFiles) {
let fileHandle
try {
fileHandle = await open(
`${contentDir}${OutputCacheManager.path(build, path)}`
)
} catch (error) {
logger.warn(
{ path, error, projectId, userId, build },
'error opening file to add to output files archive'
)
missingFiles.push(path)
continue
}
const fileStream = fileHandle.createReadStream()
archive.append(fileStream, { name: path })
}
if (missingFiles.length > 0) {
archive.append(missingFiles.join('\n'), {
name: 'missing_files.txt',
})
}
archive.finalize().catch(error => {
logger.error(
{ error, projectId, userId, build },
'error finalizing output files archive'
)
})
return archive
},
async _getAllOutputFiles(contentDir, projectId, userId, build) {
try {
const { outputFiles } = await OutputFileFinder.promises.findOutputFiles(
[],
`${contentDir}${OutputCacheManager.path(build, '.')}`
)
return outputFiles.filter(
// Ignore the pdf, clsi-cache tar-ball and also ignore the files ignored by the frontend.
({ path }) =>
path !== 'output.pdf' &&
path !== 'output.tar.gz' &&
!ignoreFiles.includes(path)
)
} catch (error) {
if (
error.code === 'ENOENT' ||
error.code === 'ENOTDIR' ||
error.code === 'EACCES'
) {
throw new NotFoundError('Output files not found')
}
throw error
}
},
}