DecodedBitStreamParser.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. <?php
  2. /*
  3. * Copyright 2007 ZXing authors
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. namespace Zxing\Qrcode\Decoder;
  18. use Zxing\DecodeHintType;
  19. use Zxing\FormatException;
  20. use Zxing\Common\BitSource;
  21. use Zxing\Common\CharacterSetECI;
  22. use Zxing\Common\DecoderResult;
  23. use Zxing\Common\StringUtils;
  24. /**
  25. * <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes
  26. * in one QR Code. This class decodes the bits back into text.</p>
  27. *
  28. * <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>
  29. *
  30. * @author Sean Owen
  31. */
  32. final class DecodedBitStreamParser {
  33. /**
  34. * See ISO 18004:2006, 6.4.4 Table 5
  35. */
  36. private static $ALPHANUMERIC_CHARS = array(
  37. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
  38. 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  39. 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  40. ' ', '$', '%', '*', '+', '-', '.', '/', ':'
  41. );
  42. private static $GB2312_SUBSET = 1;
  43. private function DecodedBitStreamParser() {
  44. }
  45. static function decode($bytes,
  46. $version,
  47. $ecLevel,
  48. $hints) {
  49. $bits = new BitSource($bytes);
  50. $result = '';//new StringBuilder(50);
  51. $byteSegments = array();
  52. $symbolSequence = -1;
  53. $parityData = -1;
  54. try {
  55. $currentCharacterSetECI = null;
  56. $fc1InEffect = false;
  57. $mode='';
  58. do {
  59. // While still another segment to read...
  60. if ($bits->available() < 4) {
  61. // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
  62. $mode = Mode::$TERMINATOR;
  63. } else {
  64. $mode = Mode::forBits($bits->readBits(4)); // mode is encoded by 4 bits
  65. }
  66. if ($mode != Mode::$TERMINATOR) {
  67. if ($mode == Mode::$FNC1_FIRST_POSITION || $mode == Mode::$FNC1_SECOND_POSITION) {
  68. // We do little with FNC1 except alter the parsed result a bit according to the spec
  69. $fc1InEffect = true;
  70. } else if ($mode == Mode::$STRUCTURED_APPEND) {
  71. if ($bits->available() < 16) {
  72. throw FormatException::getFormatInstance();
  73. }
  74. // sequence number and parity is added later to the result metadata
  75. // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
  76. $symbolSequence = $bits->readBits(8);
  77. $parityData = $bits->readBits(8);
  78. } else if ($mode == Mode::$ECI) {
  79. // Count doesn't apply to ECI
  80. $value = self::parseECIValue($bits);
  81. $currentCharacterSetECI = CharacterSetECI::getCharacterSetECIByValue($value);
  82. if ($currentCharacterSetECI == null) {
  83. throw FormatException::getFormatInstance();
  84. }
  85. } else {
  86. // First handle Hanzi mode which does not start with character count
  87. if ($mode == Mode::$HANZI) {
  88. //chinese mode contains a sub set indicator right after mode indicator
  89. $subset = $bits->readBits(4);
  90. $countHanzi = $bits->readBits($mode->getCharacterCountBits($version));
  91. if ($subset == self::$GB2312_SUBSET) {
  92. self::decodeHanziSegment($bits, $result, $countHanzi);
  93. }
  94. } else {
  95. // "Normal" QR code modes:
  96. // How many characters will follow, encoded in this mode?
  97. $count = $bits->readBits($mode->getCharacterCountBits($version));
  98. if ($mode == Mode::$NUMERIC) {
  99. self::decodeNumericSegment($bits, $result, $count);
  100. } else if ($mode == Mode::$ALPHANUMERIC) {
  101. self::decodeAlphanumericSegment($bits, $result, $count, $fc1InEffect);
  102. } else if ($mode == Mode::$BYTE) {
  103. self::decodeByteSegment($bits, $result, $count, $currentCharacterSetECI, $byteSegments, $hints);
  104. } else if ($mode == Mode::$KANJI) {
  105. self::decodeKanjiSegment($bits, $result, $count);
  106. } else {
  107. throw FormatException::getFormatInstance();
  108. }
  109. }
  110. }
  111. }
  112. } while ($mode != Mode::$TERMINATOR);
  113. } catch (IllegalArgumentException $iae) {
  114. // from readBits() calls
  115. throw FormatException::getFormatInstance();
  116. }
  117. return new DecoderResult($bytes,
  118. $result,
  119. empty($byteSegments) ? null : $byteSegments,
  120. $ecLevel == null ? null : 'L',//ErrorCorrectionLevel::toString($ecLevel),
  121. $symbolSequence,
  122. $parityData);
  123. }
  124. /**
  125. * See specification GBT 18284-2000
  126. */
  127. private static function decodeHanziSegment($bits,
  128. &$result,
  129. $count) {
  130. // Don't crash trying to read more bits than we have available.
  131. if ($count * 13 > $bits->available()) {
  132. throw FormatException::getFormatInstance();
  133. }
  134. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  135. // and decode as GB2312 afterwards
  136. $buffer = fill_array(0,2 * $count,0);
  137. $offset = 0;
  138. while ($count > 0) {
  139. // Each 13 bits encodes a 2-byte character
  140. $twoBytes = $bits->readBits(13);
  141. $assembledTwoBytes = (($twoBytes / 0x060) << 8) | ($twoBytes % 0x060);
  142. if ($assembledTwoBytes < 0x003BF) {
  143. // In the 0xA1A1 to 0xAAFE range
  144. $assembledTwoBytes += 0x0A1A1;
  145. } else {
  146. // In the 0xB0A1 to 0xFAFE range
  147. $assembledTwoBytes += 0x0A6A1;
  148. }
  149. $buffer[$offset] = (($assembledTwoBytes >> 8) & 0xFF);//(byte)
  150. $buffer[$offset + 1] = ($assembledTwoBytes & 0xFF);//(byte)
  151. $offset += 2;
  152. $count--;
  153. }
  154. try {
  155. $result .= iconv('GB2312', 'UTF-8', implode($buffer));
  156. } catch (UnsupportedEncodingException $ignored) {
  157. throw FormatException::getFormatInstance();
  158. }
  159. }
  160. private static function decodeKanjiSegment($bits,
  161. &$result,
  162. $count) {
  163. // Don't crash trying to read more bits than we have available.
  164. if ($count * 13 > $bits->available()) {
  165. throw FormatException::getFormatInstance();
  166. }
  167. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  168. // and decode as Shift_JIS afterwards
  169. $buffer = array(0,2 * $count,0);
  170. $offset = 0;
  171. while ($count > 0) {
  172. // Each 13 bits encodes a 2-byte character
  173. $twoBytes = $bits->readBits(13);
  174. $assembledTwoBytes = (($twoBytes / 0x0C0) << 8) | ($twoBytes % 0x0C0);
  175. if ($assembledTwoBytes < 0x01F00) {
  176. // In the 0x8140 to 0x9FFC range
  177. $assembledTwoBytes += 0x08140;
  178. } else {
  179. // In the 0xE040 to 0xEBBF range
  180. $assembledTwoBytes += 0x0C140;
  181. }
  182. $buffer[$offset] = ($assembledTwoBytes >> 8);//(byte)
  183. $buffer[$offset + 1] = $assembledTwoBytes; //(byte)
  184. $offset += 2;
  185. $count--;
  186. }
  187. // Shift_JIS may not be supported in some environments:
  188. try {
  189. $result .= iconv('shift-jis','utf-8',implode($buffer));
  190. } catch (UnsupportedEncodingException $ignored) {
  191. throw FormatException::getFormatInstance();
  192. }
  193. }
  194. private static function decodeByteSegment($bits,
  195. &$result,
  196. $count,
  197. $currentCharacterSetECI,
  198. &$byteSegments,
  199. $hints) {
  200. // Don't crash trying to read more bits than we have available.
  201. if (8 * $count > $bits->available()) {
  202. throw FormatException::getFormatInstance();
  203. }
  204. $readBytes = fill_array(0,$count,0);
  205. for ($i = 0; $i < $count; $i++) {
  206. $readBytes[$i] = $bits->readBits(8);//(byte)
  207. }
  208. $text = implode(array_map('chr',$readBytes));
  209. $encoding = '';
  210. if ($currentCharacterSetECI == null) {
  211. // The spec isn't clear on this mode; see
  212. // section 6.4.5: t does not say which encoding to assuming
  213. // upon decoding. I have seen ISO-8859-1 used as well as
  214. // Shift_JIS -- without anything like an ECI designator to
  215. // give a hint.
  216. $encoding = mb_detect_encoding($text, $hints);
  217. } else {
  218. $encoding = $currentCharacterSetECI->name();
  219. }
  220. try {
  221. // $result.= mb_convert_encoding($text ,$encoding);//(new String(readBytes, encoding));
  222. $result.= $text;//(new String(readBytes, encoding));
  223. } catch (UnsupportedEncodingException $ignored) {
  224. throw FormatException::getFormatInstance();
  225. }
  226. $byteSegments = array_merge($byteSegments, $readBytes);
  227. }
  228. private static function toAlphaNumericChar($value) {
  229. if ($value >= count(self::$ALPHANUMERIC_CHARS)) {
  230. throw FormatException::getFormatInstance();
  231. }
  232. return self::$ALPHANUMERIC_CHARS[$value];
  233. }
  234. private static function decodeAlphanumericSegment($bits,
  235. &$result,
  236. $count,
  237. $fc1InEffect) {
  238. // Read two characters at a time
  239. $start = strlen($result);
  240. while ($count > 1) {
  241. if ($bits->available() < 11) {
  242. throw FormatException::getFormatInstance();
  243. }
  244. $nextTwoCharsBits = $bits->readBits(11);
  245. $result.=(self::toAlphaNumericChar($nextTwoCharsBits / 45));
  246. $result.=(self::toAlphaNumericChar($nextTwoCharsBits % 45));
  247. $count -= 2;
  248. }
  249. if ($count == 1) {
  250. // special case: one character left
  251. if ($bits->available() < 6) {
  252. throw FormatException::getFormatInstance();
  253. }
  254. $result.=self::toAlphaNumericChar($bits->readBits(6));
  255. }
  256. // See section 6.4.8.1, 6.4.8.2
  257. if ($fc1InEffect) {
  258. // We need to massage the result a bit if in an FNC1 mode:
  259. for ($i = $start; $i < strlen($result); $i++) {
  260. if ($result{$i} == '%') {
  261. if ($i < strlen($result) - 1 && $result{$i + 1} == '%') {
  262. // %% is rendered as %
  263. $result = substr_replace($result,'',$i + 1,1);//deleteCharAt(i + 1);
  264. } else {
  265. // In alpha mode, % should be converted to FNC1 separator 0x1D
  266. $result.setCharAt($i, chr(0x1D));
  267. }
  268. }
  269. }
  270. }
  271. }
  272. private static function decodeNumericSegment($bits,
  273. &$result,
  274. $count) {
  275. // Read three digits at a time
  276. while ($count >= 3) {
  277. // Each 10 bits encodes three digits
  278. if ($bits->available() < 10) {
  279. throw FormatException::getFormatInstance();
  280. }
  281. $threeDigitsBits = $bits->readBits(10);
  282. if ($threeDigitsBits >= 1000) {
  283. throw FormatException::getFormatInstance();
  284. }
  285. $result.=(self::toAlphaNumericChar($threeDigitsBits / 100));
  286. $result.=(self::toAlphaNumericChar(($threeDigitsBits / 10) % 10));
  287. $result.=(self::toAlphaNumericChar($threeDigitsBits % 10));
  288. $count -= 3;
  289. }
  290. if ($count == 2) {
  291. // Two digits left over to read, encoded in 7 bits
  292. if ($bits->available() < 7) {
  293. throw FormatException::getFormatInstance();
  294. }
  295. $twoDigitsBits = $bits->readBits(7);
  296. if ($twoDigitsBits >= 100) {
  297. throw FormatException::getFormatInstance();
  298. }
  299. $result.=(self::toAlphaNumericChar($twoDigitsBits / 10));
  300. $result.=(self::toAlphaNumericChar($twoDigitsBits % 10));
  301. } else if ($count == 1) {
  302. // One digit left over to read
  303. if ($bits->available() < 4) {
  304. throw FormatException::getFormatInstance();
  305. }
  306. $digitBits = $bits->readBits(4);
  307. if ($digitBits >= 10) {
  308. throw FormatException::getFormatInstance();
  309. }
  310. $result.=(self::toAlphaNumericChar($digitBits));
  311. }
  312. }
  313. private static function parseECIValue($bits) {
  314. $firstByte = $bits->readBits(8);
  315. if (($firstByte & 0x80) == 0) {
  316. // just one byte
  317. return $firstByte & 0x7F;
  318. }
  319. if (($firstByte & 0xC0) == 0x80) {
  320. // two bytes
  321. $secondByte = $bits->readBits(8);
  322. return (($firstByte & 0x3F) << 8) | $secondByte;
  323. }
  324. if (($firstByte & 0xE0) == 0xC0) {
  325. // three bytes
  326. $secondThirdBytes = $bits->readBits(16);
  327. return (($firstByte & 0x1F) << 16) | $secondThirdBytes;
  328. }
  329. throw FormatException::getFormatInstance();
  330. }
  331. }