You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

553 lines
16 KiB

2 months ago
'use strict'
// this[BUFFER] is the remainder of a chunk if we're waiting for
// the full 512 bytes of a header to come in. We will Buffer.concat()
// it to the next write(), which is a mem copy, but a small one.
//
// this[QUEUE] is a Yallist of entries that haven't been emitted
// yet this can only get filled up if the user keeps write()ing after
// a write() returns false, or does a write() with more than one entry
//
// We don't buffer chunks, we always parse them and either create an
// entry, or push it into the active entry. The ReadEntry class knows
// to throw data away if .ignore=true
//
// Shift entry off the buffer when it emits 'end', and emit 'entry' for
// the next one in the list.
//
// At any time, we're pushing body chunks into the entry at WRITEENTRY,
// and waiting for 'end' on the entry at READENTRY
//
// ignored entries get .resume() called on them straight away
const warner = require('./warn-mixin.js')
const Header = require('./header.js')
const EE = require('events')
const Yallist = require('yallist')
const maxMetaEntrySize = 1024 * 1024
const Entry = require('./read-entry.js')
const Pax = require('./pax.js')
const zlib = require('minizlib')
const { nextTick } = require('process')
const gzipHeader = Buffer.from([0x1f, 0x8b])
const STATE = Symbol('state')
const WRITEENTRY = Symbol('writeEntry')
const READENTRY = Symbol('readEntry')
const NEXTENTRY = Symbol('nextEntry')
const PROCESSENTRY = Symbol('processEntry')
const EX = Symbol('extendedHeader')
const GEX = Symbol('globalExtendedHeader')
const META = Symbol('meta')
const EMITMETA = Symbol('emitMeta')
const BUFFER = Symbol('buffer')
const QUEUE = Symbol('queue')
const ENDED = Symbol('ended')
const EMITTEDEND = Symbol('emittedEnd')
const EMIT = Symbol('emit')
const UNZIP = Symbol('unzip')
const CONSUMECHUNK = Symbol('consumeChunk')
const CONSUMECHUNKSUB = Symbol('consumeChunkSub')
const CONSUMEBODY = Symbol('consumeBody')
const CONSUMEMETA = Symbol('consumeMeta')
const CONSUMEHEADER = Symbol('consumeHeader')
const CONSUMING = Symbol('consuming')
const BUFFERCONCAT = Symbol('bufferConcat')
const MAYBEEND = Symbol('maybeEnd')
const WRITING = Symbol('writing')
const ABORTED = Symbol('aborted')
const DONE = Symbol('onDone')
const SAW_VALID_ENTRY = Symbol('sawValidEntry')
const SAW_NULL_BLOCK = Symbol('sawNullBlock')
const SAW_EOF = Symbol('sawEOF')
const CLOSESTREAM = Symbol('closeStream')
const noop = _ => true
module.exports = warner(class Parser extends EE {
constructor (opt) {
opt = opt || {}
super(opt)
this.file = opt.file || ''
// set to boolean false when an entry starts. 1024 bytes of \0
// is technically a valid tarball, albeit a boring one.
this[SAW_VALID_ENTRY] = null
// these BADARCHIVE errors can't be detected early. listen on DONE.
this.on(DONE, _ => {
if (this[STATE] === 'begin' || this[SAW_VALID_ENTRY] === false) {
// either less than 1 block of data, or all entries were invalid.
// Either way, probably not even a tarball.
this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format')
}
})
if (opt.ondone) {
this.on(DONE, opt.ondone)
} else {
this.on(DONE, _ => {
this.emit('prefinish')
this.emit('finish')
this.emit('end')
})
}
this.strict = !!opt.strict
this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize
this.filter = typeof opt.filter === 'function' ? opt.filter : noop
// Unlike gzip, brotli doesn't have any magic bytes to identify it
// Users need to explicitly tell us they're extracting a brotli file
// Or we infer from the file extension
const isTBR = (opt.file && (
opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')))
// if it's a tbr file it MIGHT be brotli, but we don't know until
// we look at it and verify it's not a valid tar file.
this.brotli = !opt.gzip && opt.brotli !== undefined ? opt.brotli
: isTBR ? undefined
: false
// have to set this so that streams are ok piping into it
this.writable = true
this.readable = false
this[QUEUE] = new Yallist()
this[BUFFER] = null
this[READENTRY] = null
this[WRITEENTRY] = null
this[STATE] = 'begin'
this[META] = ''
this[EX] = null
this[GEX] = null
this[ENDED] = false
this[UNZIP] = null
this[ABORTED] = false
this[SAW_NULL_BLOCK] = false
this[SAW_EOF] = false
this.on('end', () => this[CLOSESTREAM]())
if (typeof opt.onwarn === 'function') {
this.on('warn', opt.onwarn)
}
if (typeof opt.onentry === 'function') {
this.on('entry', opt.onentry)
}
}
[CONSUMEHEADER] (chunk, position) {
if (this[SAW_VALID_ENTRY] === null) {
this[SAW_VALID_ENTRY] = false
}
let header
try {
header = new Header(chunk, position, this[EX], this[GEX])
} catch (er) {
return this.warn('TAR_ENTRY_INVALID', er)
}
if (header.nullBlock) {
if (this[SAW_NULL_BLOCK]) {
this[SAW_EOF] = true
// ending an archive with no entries. pointless, but legal.
if (this[STATE] === 'begin') {
this[STATE] = 'header'
}
this[EMIT]('eof')
} else {
this[SAW_NULL_BLOCK] = true
this[EMIT]('nullBlock')
}
} else {
this[SAW_NULL_BLOCK] = false
if (!header.cksumValid) {
this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header })
} else if (!header.path) {
this.warn('TAR_ENTRY_INVALID', 'path is required', { header })
} else {
const type = header.type
if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) {
this.warn('TAR_ENTRY_INVALID', 'linkpath required', { header })
} else if (!/^(Symbolic)?Link$/.test(type) && header.linkpath) {
this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', { header })
} else {
const entry = this[WRITEENTRY] = new Entry(header, this[EX], this[GEX])
// we do this for meta & ignored entries as well, because they
// are still valid tar, or else we wouldn't know to ignore them
if (!this[SAW_VALID_ENTRY]) {
if (entry.remain) {
// this might be the one!
const onend = () => {
if (!entry.invalid) {
this[SAW_VALID_ENTRY] = true
}
}
entry.on('end', onend)
} else {
this[SAW_VALID_ENTRY] = true
}
}
if (entry.meta) {
if (entry.size > this.maxMetaEntrySize) {
entry.ignore = true
this[EMIT]('ignoredEntry', entry)
this[STATE] = 'ignore'
entry.resume()
} else if (entry.size > 0) {
this[META] = ''
entry.on('data', c => this[META] += c)
this[STATE] = 'meta'
}
} else {
this[EX] = null
entry.ignore = entry.ignore || !this.filter(entry.path, entry)
if (entry.ignore) {
// probably valid, just not something we care about
this[EMIT]('ignoredEntry', entry)
this[STATE] = entry.remain ? 'ignore' : 'header'
entry.resume()
} else {
if (entry.remain) {
this[STATE] = 'body'
} else {
this[STATE] = 'header'
entry.end()
}
if (!this[READENTRY]) {
this[QUEUE].push(entry)
this[NEXTENTRY]()
} else {
this[QUEUE].push(entry)
}
}
}
}
}
}
}
[CLOSESTREAM] () {
nextTick(() => this.emit('close'))
}
[PROCESSENTRY] (entry) {
let go = true
if (!entry) {
this[READENTRY] = null
go = false
} else if (Array.isArray(entry)) {
this.emit.apply(this, entry)
} else {
this[READENTRY] = entry
this.emit('entry', entry)
if (!entry.emittedEnd) {
entry.on('end', _ => this[NEXTENTRY]())
go = false
}
}
return go
}
[NEXTENTRY] () {
do {} while (this[PROCESSENTRY](this[QUEUE].shift()))
if (!this[QUEUE].length) {
// At this point, there's nothing in the queue, but we may have an
// entry which is being consumed (readEntry).
// If we don't, then we definitely can handle more data.
// If we do, and either it's flowing, or it has never had any data
// written to it, then it needs more.
// The only other possibility is that it has returned false from a
// write() call, so we wait for the next drain to continue.
const re = this[READENTRY]
const drainNow = !re || re.flowing || re.size === re.remain
if (drainNow) {
if (!this[WRITING]) {
this.emit('drain')
}
} else {
re.once('drain', _ => this.emit('drain'))
}
}
}
[CONSUMEBODY] (chunk, position) {
// write up to but no more than writeEntry.blockRemain
const entry = this[WRITEENTRY]
const br = entry.blockRemain
const c = (br >= chunk.length && position === 0) ? chunk
: chunk.slice(position, position + br)
entry.write(c)
if (!entry.blockRemain) {
this[STATE] = 'header'
this[WRITEENTRY] = null
entry.end()
}
return c.length
}
[CONSUMEMETA] (chunk, position) {
const entry = this[WRITEENTRY]
const ret = this[CONSUMEBODY](chunk, position)
// if we finished, then the entry is reset
if (!this[WRITEENTRY]) {
this[EMITMETA](entry)
}
return ret
}
[EMIT] (ev, data, extra) {
if (!this[QUEUE].length && !this[READENTRY]) {
this.emit(ev, data, extra)
} else {
this[QUEUE].push([ev, data, extra])
}
}
[EMITMETA] (entry) {
this[EMIT]('meta', this[META])
switch (entry.type) {
case 'ExtendedHeader':
case 'OldExtendedHeader':
this[EX] = Pax.parse(this[META], this[EX], false)
break
case 'GlobalExtendedHeader':
this[GEX] = Pax.parse(this[META], this[GEX], true)
break
case 'NextFileHasLongPath':
case 'OldGnuLongPath':
this[EX] = this[EX] || Object.create(null)
this[EX].path = this[META].replace(/\0.*/, '')
break
case 'NextFileHasLongLinkpath':
this[EX] = this[EX] || Object.create(null)
this[EX].linkpath = this[META].replace(/\0.*/, '')
break
/* istanbul ignore next */
default: throw new Error('unknown meta: ' + entry.type)
}
}
abort (error) {
this[ABORTED] = true
this.emit('abort', error)
// always throws, even in non-strict mode
this.warn('TAR_ABORT', error, { recoverable: false })
}
write (chunk) {
if (this[ABORTED]) {
return
}
// first write, might be gzipped
const needSniff = this[UNZIP] === null ||
this.brotli === undefined && this[UNZIP] === false
if (needSniff && chunk) {
if (this[BUFFER]) {
chunk = Buffer.concat([this[BUFFER], chunk])
this[BUFFER] = null
}
if (chunk.length < gzipHeader.length) {
this[BUFFER] = chunk
return true
}
// look for gzip header
for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) {
if (chunk[i] !== gzipHeader[i]) {
this[UNZIP] = false
}
}
const maybeBrotli = this.brotli === undefined
if (this[UNZIP] === false && maybeBrotli) {
// read the first header to see if it's a valid tar file. If so,
// we can safely assume that it's not actually brotli, despite the
// .tbr or .tar.br file extension.
// if we ended before getting a full chunk, yes, def brotli
if (chunk.length < 512) {
if (this[ENDED]) {
this.brotli = true
} else {
this[BUFFER] = chunk
return true
}
} else {
// if it's tar, it's pretty reliably not brotli, chances of
// that happening are astronomical.
try {
new Header(chunk.slice(0, 512))
this.brotli = false
} catch (_) {
this.brotli = true
}
}
}
if (this[UNZIP] === null || (this[UNZIP] === false && this.brotli)) {
const ended = this[ENDED]
this[ENDED] = false
this[UNZIP] = this[UNZIP] === null
? new zlib.Unzip()
: new zlib.BrotliDecompress()
this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk))
this[UNZIP].on('error', er => this.abort(er))
this[UNZIP].on('end', _ => {
this[ENDED] = true
this[CONSUMECHUNK]()
})
this[WRITING] = true
const ret = this[UNZIP][ended ? 'end' : 'write'](chunk)
this[WRITING] = false
return ret
}
}
this[WRITING] = true
if (this[UNZIP]) {
this[UNZIP].write(chunk)
} else {
this[CONSUMECHUNK](chunk)
}
this[WRITING] = false
// return false if there's a queue, or if the current entry isn't flowing
const ret =
this[QUEUE].length ? false :
this[READENTRY] ? this[READENTRY].flowing :
true
// if we have no queue, then that means a clogged READENTRY
if (!ret && !this[QUEUE].length) {
this[READENTRY].once('drain', _ => this.emit('drain'))
}
return ret
}
[BUFFERCONCAT] (c) {
if (c && !this[ABORTED]) {
this[BUFFER] = this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c
}
}
[MAYBEEND] () {
if (this[ENDED] &&
!this[EMITTEDEND] &&
!this[ABORTED] &&
!this[CONSUMING]) {
this[EMITTEDEND] = true
const entry = this[WRITEENTRY]
if (entry && entry.blockRemain) {
// truncated, likely a damaged file
const have = this[BUFFER] ? this[BUFFER].length : 0
this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${
entry.blockRemain} more bytes, only ${have} available)`, { entry })
if (this[BUFFER]) {
entry.write(this[BUFFER])
}
entry.end()
}
this[EMIT](DONE)
}
}
[CONSUMECHUNK] (chunk) {
if (this[CONSUMING]) {
this[BUFFERCONCAT](chunk)
} else if (!chunk && !this[BUFFER]) {
this[MAYBEEND]()
} else {
this[CONSUMING] = true
if (this[BUFFER]) {
this[BUFFERCONCAT](chunk)
const c = this[BUFFER]
this[BUFFER] = null
this[CONSUMECHUNKSUB](c)
} else {
this[CONSUMECHUNKSUB](chunk)
}
while (this[BUFFER] &&
this[BUFFER].length >= 512 &&
!this[ABORTED] &&
!this[SAW_EOF]) {
const c = this[BUFFER]
this[BUFFER] = null
this[CONSUMECHUNKSUB](c)
}
this[CONSUMING] = false
}
if (!this[BUFFER] || this[ENDED]) {
this[MAYBEEND]()
}
}
[CONSUMECHUNKSUB] (chunk) {
// we know that we are in CONSUMING mode, so anything written goes into
// the buffer. Advance the position and put any remainder in the buffer.
let position = 0
const length = chunk.length
while (position + 512 <= length && !this[ABORTED] && !this[SAW_EOF]) {
switch (this[STATE]) {
case 'begin':
case 'header':
this[CONSUMEHEADER](chunk, position)
position += 512
break
case 'ignore':
case 'body':
position += this[CONSUMEBODY](chunk, position)
break
case 'meta':
position += this[CONSUMEMETA](chunk, position)
break
/* istanbul ignore next */
default:
throw new Error('invalid state: ' + this[STATE])
}
}
if (position < length) {
if (this[BUFFER]) {
this[BUFFER] = Buffer.concat([chunk.slice(position), this[BUFFER]])
} else {
this[BUFFER] = chunk.slice(position)
}
}
}
end (chunk) {
if (!this[ABORTED]) {
if (this[UNZIP]) {
this[UNZIP].end(chunk)
} else {
this[ENDED] = true
if (this.brotli === undefined) chunk = chunk || Buffer.alloc(0)
this.write(chunk)
}
}
}
})