You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							152 lines
						
					
					
						
							3.3 KiB
						
					
					
				
			
		
		
	
	
							152 lines
						
					
					
						
							3.3 KiB
						
					
					
				| 
 | |
| var fs = require('fs');
 | |
| 
 | |
| var utf8  = require('./encoding/utf8'),
 | |
|   unicode = require('./encoding/unicode'),
 | |
|   mbcs    = require('./encoding/mbcs'),
 | |
|   sbcs    = require('./encoding/sbcs'),
 | |
|   iso2022 = require('./encoding/iso2022');
 | |
| 
 | |
| var self = this;
 | |
| 
 | |
| var recognisers = [
 | |
|   new utf8,
 | |
|   new unicode.UTF_16BE,
 | |
|   new unicode.UTF_16LE,
 | |
|   new unicode.UTF_32BE,
 | |
|   new unicode.UTF_32LE,
 | |
|   new mbcs.sjis,
 | |
|   new mbcs.big5,
 | |
|   new mbcs.euc_jp,
 | |
|   new mbcs.euc_kr,
 | |
|   new mbcs.gb_18030,
 | |
|   new iso2022.ISO_2022_JP,
 | |
|   new iso2022.ISO_2022_KR,
 | |
|   new iso2022.ISO_2022_CN,
 | |
|   new sbcs.ISO_8859_1,
 | |
|   new sbcs.ISO_8859_2,
 | |
|   new sbcs.ISO_8859_5,
 | |
|   new sbcs.ISO_8859_6,
 | |
|   new sbcs.ISO_8859_7,
 | |
|   new sbcs.ISO_8859_8,
 | |
|   new sbcs.ISO_8859_9,
 | |
|   new sbcs.windows_1251,
 | |
|   new sbcs.windows_1256,
 | |
|   new sbcs.KOI8_R
 | |
| ];
 | |
| 
 | |
| module.exports.detect = function(buffer, opts) {
 | |
| 
 | |
|   // Tally up the byte occurence statistics.
 | |
|   var fByteStats = [];
 | |
|   for (var i = 0; i < 256; i++)
 | |
|     fByteStats[i] = 0;
 | |
| 
 | |
|   for (var i = buffer.length - 1; i >= 0; i--)
 | |
|     fByteStats[buffer[i] & 0x00ff]++;
 | |
| 
 | |
|   var fC1Bytes = false;
 | |
|   for (var i = 0x80; i <= 0x9F; i += 1) {
 | |
|     if (fByteStats[i] != 0) {
 | |
|       fC1Bytes = true;
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   var context = {
 | |
|     fByteStats:  fByteStats,
 | |
|     fC1Bytes:    fC1Bytes,
 | |
|     fRawInput:   buffer,
 | |
|     fRawLength:  buffer.length,
 | |
|     fInputBytes: buffer,
 | |
|     fInputLen:   buffer.length
 | |
|   };
 | |
| 
 | |
|   var matches = recognisers.map(function(rec) {
 | |
|     return rec.match(context);
 | |
|   }).filter(function(match) {
 | |
|     return !!match;
 | |
|   }).sort(function(a, b) {
 | |
|     return b.confidence - a.confidence;
 | |
|   });
 | |
| 
 | |
|   if (opts && opts.returnAllMatches === true) {
 | |
|     return matches;
 | |
|   }
 | |
|   else {
 | |
|     return matches.length > 0 ? matches[0].name : null;
 | |
|   }
 | |
| };
 | |
| 
 | |
| module.exports.detectFile = function(filepath, opts, cb) {
 | |
|   if (typeof opts === 'function') {
 | |
|     cb = opts;
 | |
|     opts = undefined;
 | |
|   }
 | |
| 
 | |
|   var fd;
 | |
| 
 | |
|   var handler = function(err, buffer) {
 | |
|     if (fd) {
 | |
|       fs.closeSync(fd);
 | |
|     }
 | |
| 
 | |
|     if (err) return cb(err, null);
 | |
|     cb(null, self.detect(buffer, opts));
 | |
|   };
 | |
| 
 | |
|   if (opts && opts.sampleSize) {
 | |
|     fd = fs.openSync(filepath, 'r'),
 | |
|       sample = Buffer.allocUnsafe(opts.sampleSize);
 | |
| 
 | |
|     fs.read(fd, sample, 0, opts.sampleSize, null, function(err) {
 | |
|       handler(err, sample);
 | |
|     });
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   fs.readFile(filepath, handler);
 | |
| };
 | |
| 
 | |
| module.exports.detectFileSync = function(filepath, opts) {
 | |
|   if (opts && opts.sampleSize) {
 | |
|     var fd = fs.openSync(filepath, 'r'),
 | |
|       sample = Buffer.allocUnsafe(opts.sampleSize);
 | |
| 
 | |
|     fs.readSync(fd, sample, 0, opts.sampleSize);
 | |
|     fs.closeSync(fd);
 | |
|     return self.detect(sample, opts);
 | |
|   }
 | |
| 
 | |
|   return self.detect(fs.readFileSync(filepath), opts);
 | |
| };
 | |
| 
 | |
| // Wrappers for the previous functions to return all encodings
 | |
| module.exports.detectAll = function(buffer, opts) {
 | |
|   if (typeof opts !== 'object') {
 | |
|     opts = {};
 | |
|   }
 | |
|   opts.returnAllMatches = true;
 | |
|   return self.detect(buffer, opts);
 | |
| }
 | |
| 
 | |
| module.exports.detectFileAll = function(filepath, opts, cb) {
 | |
|   if (typeof opts === 'function') {
 | |
|     cb = opts;
 | |
|     opts = undefined;
 | |
|   }
 | |
|   if (typeof opts !== 'object') {
 | |
|     opts = {};
 | |
|   }
 | |
|   opts.returnAllMatches = true;
 | |
|   self.detectFile(filepath, opts, cb);
 | |
| }
 | |
| 
 | |
| module.exports.detectFileAllSync = function(filepath, opts) {
 | |
|   if (typeof opts !== 'object') {
 | |
|     opts = {};
 | |
|   }
 | |
|   opts.returnAllMatches = true;
 | |
|   return self.detectFileSync(filepath, opts);
 | |
| }
 |