<?php
declare(strict_types=1);
/**
* Check SOF and EOF markers for a file.
* Returns an associative array:
* [
* 'path' => '/path/to/file',
* 'format' => 'jpeg'|'png'|'gif'|'webp'|'bmp'|'svg'|'mp4'|'avi'|'mkv'|'unknown' ...,
* 'sof' => true|false,
* 'eof' => true|false,
* 'ok' => true|false, // true if sof && eof
* 'note' => string|null // extra info when ambiguous/failure
* ]
*
* For formats where EOF can't be reliably determined bytewise the function returns eof=false
* and provides a note explaining the limitation.
*/
function check_sof_eof(string $path): array
{
if (!is_readable($path)) {
return ['path'=>$path,'format'=>'','sof'=>false,'eof'=>false,'ok'=>false,'note'=>'file not readable'];
}
$size = filesize($path);
if ($size === false || $size === 0) {
return ['path'=>$path,'format'=>'','sof'=>false,'eof'=>false,'ok'=>false,'note'=>'empty or unreadable file'];
}
$fh = fopen($path, 'rb');
if (!$fh) {
return ['path'=>$path,'format'=>'','sof'=>false,'eof'=>false,'ok'=>false,'note'=>'failed to open file'];
}
// Read some header bytes (enough for detection)
$head = fread($fh, 64);
fseek($fh, 0, SEEK_END);
$tail = '';
$readTail = min(64, $size);
if ($readTail > 0) {
fseek($fh, -$readTail, SEEK_END);
$tail = fread($fh, $readTail);
}
// we'll reposition as needed for parsers
rewind($fh);
$res = [
'path'=>$path,
'format'=>'unknown',
'sof'=>false,
'eof'=>false,
'ok'=>false,
'note'=>null,
];
// helper closures
$startsWith = function(string $data, string $prefix): bool {
return substr($data, 0, strlen($prefix)) === $prefix;
};
$endsWith = function(string $data, string $suffix): bool {
return substr($data, -strlen($suffix)) === $suffix;
};
// Detect formats by magic / header
// JPEG
if ($startsWith($head, "\xFF\xD8")) {
$res['format'] = 'jpeg';
$res['sof'] = true; // SOI at file start
// Check for EOI 0xFF 0xD9 in last bytes (common)
// It's safest to scan from end for 0xFFD9 sequence
// read last 4KB or whole file if smaller
$scanSize = min(4096, $size);
fseek($fh, -$scanSize, SEEK_END);
$last = fread($fh, $scanSize);
if ($last === false) $last = '';
if (strpos($last, "\xFF\xD9") !== false || $endsWith($last, "\xFF\xD9")) {
$res['eof'] = true;
} else {
$res['eof'] = false;
$res['note'] = 'JPEG missing EOI (0xFFD9) in trailing bytes';
}
}
// PNG
elseif ($startsWith($head, "\x89PNG\x0D\x0A\x1A\x0A")) {
$res['format'] = 'png';
$res['sof'] = true; // PNG signature present
// Valid PNGs end with the IEND chunk: 00 00 00 00 49 45 4E 44 <CRC4>
// That's 12 bytes: 4 length (0) + 4 chunk type + 4 CRC
$need = 12;
if ($size < $need) {
$res['eof'] = false;
$res['note'] = 'file too small to contain IEND';
} else {
fseek($fh, -$need, SEEK_END);
$last12 = fread($fh, $need);
if ($last12 !== false && substr($last12, 0, 8) === "\x00\x00\x00\x00IEND") {
$res['eof'] = true;
} else {
$res['eof'] = false;
$res['note'] = 'PNG missing IEND chunk at end';
}
}
}
// GIF
elseif ($startsWith($head, "GIF87a") || $startsWith($head, "GIF89a")) {
$res['format'] = 'gif';
$res['sof'] = true;
// GIF trailer is single byte 0x3B
fseek($fh, -1, SEEK_END);
$last1 = fread($fh, 1);
if ($last1 === "\x3B") {
$res['eof'] = true;
} else {
$res['eof'] = false;
$res['note'] = 'GIF missing trailer byte 0x3B';
}
}
// WebP (RIFF / WEBP)
elseif ($startsWith($head, "RIFF") && substr($head, 8, 4) === "WEBP") {
$res['format'] = 'webp';
$res['sof'] = true;
// RIFF stores size at offset 4 (little endian) = file size - 8
$riffSizeBytes = substr($head, 4, 4);
$riffSize = unpack('V', $riffSizeBytes)[1]; // little-endian uint32
if (($riffSize + 8) === $size) {
$res['eof'] = true;
} else {
$res['eof'] = false;
$res['note'] = "RIFF size field ($riffSize + 8) != actual file size ($size)";
}
}
// BMP (BM header + file size at offset 2)
elseif ($startsWith($head, "BM")) {
$res['format'] = 'bmp';
$res['sof'] = true;
if (strlen($head) >= 6) {
$sizeBytes = substr($head, 2, 4);
$declared = unpack('V', $sizeBytes)[1];
if ($declared === $size) {
$res['eof'] = true;
} else {
$res['eof'] = false;
$res['note'] = "BMP header declared size $declared != actual $size";
}
} else {
$res['eof'] = false;
$res['note'] = 'BMP header too short to read size';
}
}
// SVG (text)
elseif (stripos($head, '<svg') !== false) {
$res['format'] = 'svg';
$res['sof'] = true;
// For SVG text files, check existence of closing </svg> tag somewhere near end.
$scanSize = min(8192, $size);
fseek($fh, -$scanSize, SEEK_END);
$lastChunk = fread($fh, $scanSize);
if ($lastChunk !== false && stripos($lastChunk, '</svg>') !== false) {
$res['eof'] = true;
} else {
$res['eof'] = false;
$res['note'] = 'SVG missing closing </svg> tag in trailing content';
}
}
// AVI (RIFF with 'AVI ' at offset 8)
elseif ($startsWith($head, "RIFF") && substr($head, 8, 4) === "AVI ") {
$res['format'] = 'avi';
$res['sof'] = true;
// RIFF size at offset 4
$riffSize = unpack('V', substr($head, 4, 4))[1];
if (($riffSize + 8) === $size) {
$res['eof'] = true;
} else {
$res['eof'] = false;
$res['note'] = "AVI RIFF size mismatch ($riffSize + 8) != $size";
}
}
// MP4 / MOV / ISO BMFF family: look for 'ftyp' in first 4KB and parse top-level boxes until end
else {
// attempt ISO BMFF detection: 'ftyp' somewhere in first 4096 bytes
$probeSize = min(4096, $size);
fseek($fh, 0, SEEK_SET);
$probe = fread($fh, $probeSize);
if ($probe !== false && strpos($probe, 'ftyp') !== false) {
$res['format'] = 'mp4'; // generic ISO-BMFF family
// SOF check: ensure ftyp is in header (we already know)
$res['sof'] = true;
// Now parse top-level boxes to ensure boxes sizes sum to file length.
// Basic parser: read 32-bit size + 4-byte type; if size==1 then 64-bit size follows.
fseek($fh, 0, SEEK_SET);
$pos = 0;
$ok = true;
while ($pos < $size) {
// need at least 8 bytes for size+type
if ($size - $pos < 8) {
$ok = false;
$res['note'] = "Truncated/top-level box at pos $pos (less than 8 bytes remain)";
break;
}
fseek($fh, $pos, SEEK_SET);
$hdr = fread($fh, 8);
if ($hdr === false || strlen($hdr) < 8) { $ok = false; $res['note'] = "Failed reading header at $pos"; break; }
$boxSize = unpack('N', substr($hdr,0,4))[1]; // big-endian uint32
$boxType = substr($hdr,4,4);
$headerLen = 8;
if ($boxSize === 0) {
// box extends to end of file
$boxSize = $size - $pos;
} elseif ($boxSize === 1) {
// 64-bit largesize
$ext = fread($fh, 8);
if ($ext === false || strlen($ext) < 8) { $ok = false; $res['note']="Truncated 64-bit size at $pos"; break; }
$boxSize = unpack('J', $ext)[1] ?? null;
// fallback: unpack('N2', ...) is messy; use big-int parse
$hi = unpack('N', substr($ext,0,4))[1];
$lo = unpack('N', substr($ext,4,4))[1];
// 64-bit size = hi<<32 | lo
$boxSize = ($hi * 4294967296) + $lo;
$headerLen = 16;
}
if ($boxSize < $headerLen || ($pos + $boxSize) > $size) {
// box size invalid or runs past file end
$ok = false;
$res['note'] = "Invalid box size for '$boxType' at $pos (boxSize=$boxSize, headerLen=$headerLen, fileSize=$size)";
break;
}
// advance
$pos += $boxSize;
} // end while
if ($ok && $pos === $size) {
$res['eof'] = true;
} else {
$res['eof'] = false;
if (!isset($res['note'])) {
$res['note'] = 'Top-level box parsing did not consume entire file';
}
}
}
// Matroska / WebM (EBML)
elseif ($startsWith($head, "\x1A\x45\xDF\xA3")) {
$res['format'] = 'mkv/webm';
$res['sof'] = true;
// Matroska/EBML uses structured elements; there is no single "EOF marker".
// Reliable EOF verification requires a full EBML parser and validating the Segment element sizes.
$res['eof'] = false;
$res['note'] = 'Matroska/WebM does not have a single fixed EOF marker; full parser required to validate';
}
// FLV
elseif ($startsWith($head, "FLV")) {
$res['format'] = 'flv';
$res['sof'] = true;
// FLV has header and then tagged records; no single EOF magic byte. We can check header DataOffset and that file length > header.
if (strlen($head) >= 9) {
$dataOffset = unpack('N', substr($head, 5, 4))[1];
if ($size > $dataOffset) {
$res['eof'] = true; // weak: file continues after header
} else {
$res['eof'] = false;
$res['note'] = 'FLV file length <= DataOffset';
}
} else {
$res['eof'] = false;
$res['note'] = 'FLV header too short to read DataOffset';
}
}
// MPEG-TS (transport stream) — no single magic at start; packets of 188 bytes starting with 0x47
elseif (ord($head[0]) === 0x47) {
// could be MPEG-TS — check if 188 or 192 sync spacing matches
$res['format'] = 'mpeg-ts?';
$res['sof'] = true;
// Verify that sync bytes appear at intervals of 188 in the first 10k bytes
$verifySize = min(10000, $size);
fseek($fh, 0, SEEK_SET);
$chunk = fread($fh, $verifySize);
$hits188 = 0;
for ($i=0;$i+188<$verifySize;$i+=188) {
if (isset($chunk[$i]) && $chunk[$i] === "\x47") $hits188++;
}
if ($hits188 > 0) {
$res['eof'] = true; // weak heuristic
} else {
$res['eof'] = false;
$res['note'] = 'Transport stream sync bytes not detected consistently';
}
}
else {
$res['format'] = 'unknown';
$res['sof'] = false;
$res['eof'] = false;
$res['note'] = 'Unknown format or unsupported for marker verification';
}
}
$res['ok'] = ($res['sof'] === true && $res['eof'] === true);
fclose($fh);
return $res;
}
// Example usage:
$path = $argv[1] ?? '/path/to/file';
if (php_sapi_name() !== 'cli') {
echo "<pre>";
}
$result = check_sof_eof($path);
echo json_encode($result, JSON_PRETTY_PRINT), PHP_EOL;
if (php_sapi_name() !== 'cli') {
echo "</pre>";
}