Mã máy x86 - 585 byte, 468 có thưởng
Thất vọng với mục nhập cuối cùng của tôi lớn như thế nào, tôi quyết định thử một cái gì đó rất khác biệt lần này. Vẽ trêninsertusernamehere
ý tưởng tách tên thành phố khỏi tên trạng thái, do đó tránh logic không cần thiết và các đầu cuối không cần thiết, tôi vẫn nghĩ rằng tôi phải có thể làm cho chương trình nhỏ hơn chuỗi thô. UPX sẽ không giúp tôi gian lận, phàn nàn rằng chương trình này quá nhỏ. Suy nghĩ về việc nén, tôi đã cố nén đầu ra văn bản 662 byte bằng WinRar nhưng vẫn chỉ nhận được 543 byte - và điều đó không có gì để giải nén nó. Nó dường như vẫn còn quá lớn, cho rằng đó chỉ là kết quả, không có bất kỳ mã nào.
Sau đó, tôi nhận ra - Tôi chỉ sử dụng 26 ký tự cho các chữ cái và 2 ký tự khác cho dấu cách và dấu phẩy. Hmm, phù hợp với 32, chỉ cần 5 bit. Vì vậy, tôi đã viết một chương trình javascript nhanh để mã hóa các chuỗi, gán az thành 0-25 và dấu cách và dấu phẩy có 26 và 27. Để giữ mọi thứ đơn giản, mỗi ký tự được mã hóa thành 5 bit, cho dù nó có cần nhiều hay không. Từ đó, tôi chỉ ghép tất cả các bit lại với nhau và chia chúng lại thành các khối có kích thước byte. Điều này cho phép tôi gói 563 byte chuỗi thành 353 byte - tiết kiệm 37,5% hoặc khoảng 210 byte. Tôi đã không hoàn toàn quản lý để nén chương trình và dữ liệu vào cùng một không gian như chỉ là dữ liệu được giải nén, nhưng tôi đã đến đủ gần để hạnh phúc.
Quan điểm của Hxd về nhị phân:
68 3F 00 68 E8 01 68 4F 03 E8 1C 00 68 22 01 68 27 02 68 B3 03 E8 10 00 - h?.hè.hO.è..h".h'.h³.è..
BE 83 05 C6 04 00 68 4F 03 68 B3 03 E8 62 00 C3 55 89 E5 81 C5 04 00 8B - ¾ƒ.Æ..hO.h³.èb.ÃU‰å.Å..‹
76 02 8B 7E 00 B6 05 30 DB AC B2 08 D0 D0 D0 D3 FE CA FE CE 75 1E 80 FB - v.‹~.¶.0Û¬².ÐÐÐÓþÊþÎu.€û
1A 75 05 B3 20 E9 0D 00 80 FB 1B 75 05 B3 2C E9 03 00 80 C3 61 88 1D 47 - .u.³ é..€û.u.³,é..€Ãaˆ.G
B6 05 30 DB 08 D2 75 D4 FF 4E 04 75 CC 5D C2 06 00 53 B3 62 FE CB 88 DF - ¶.0Û.ÒuÔÿN.uÌ]Â..S³bþˈß
80 C7 19 38 D8 72 08 38 F8 77 04 B3 20 28 D8 5B C3 55 89 E5 81 C5 04 00 - €Ç.8Ør.8øw.³ (Ø[ÃU‰å.Å..
8B 76 00 31 C0 88 C2 89 C1 AC A8 FF 74 46 80 FA 20 74 35 08 D2 74 31 3C - ‹v.1ÀˆÂ‰Á¬¨ÿtF€ú t5.Òt1<
2C 75 30 B4 0E CD 10 89 CB 01 DB 03 5E 02 8A 07 E8 B6 FF CD 10 43 8A 07 - ,u0´.Í.‰Ë.Û.^.Š.è¶ÿÍ.CŠ.
E8 AE FF CD 10 B0 0D CD 10 B0 0A CD 10 C6 06 4C 03 00 30 D2 41 E9 C1 FF - è®ÿÍ.°.Í.°.Í.Æ.L..0ÒAéÁÿ
E8 96 FF B4 0E CD 10 88 C2 E9 B5 FF 5D C2 04 00 58 10 D7 1C 0B 64 C4 E4 - è–ÿ´.Í.ˆÂéµÿ]Â..X.×..dÄä
0E 77 60 1B 82 AD AC 9B 5A 96 3A A0 90 DE 06 12 28 19 1A 7A CC 53 54 98 - .w`.‚.¬›Z–: .Þ..(..zÌST˜
D0 29 A4 68 AC 8B 00 19 62 0E 86 49 0B 90 98 3B 62 93 30 1A 35 61 D1 04 - Ð)¤h¬‹..b.†I..˜;b“0.5aÑ.
50 01 01 CA B5 5B 50 08 26 E6 EA 2E A1 89 B4 34 68 03 40 F7 2D 12 D8 9C - P..ʵ[P.&æê.¡‰´4h.@÷-.Øœ
BA 30 34 96 D8 E6 CC CE 61 23 8D 9C 8B 23 41 B1 91 B5 24 76 17 22 44 D8 - º04–ØæÌÎa#.œ‹#A±‘µ$v."DØ
29 29 A1 BB 0B A5 37 37 60 58 40 DC 6E 60 5A C0 70 4A 44 26 E4 06 CC 1A - ))¡».¥77`X@Ün`ZÀpJD&ä.Ì.
29 36 D0 48 F5 42 D6 4D CE 24 6C DC DD A4 85 29 23 27 37 71 40 8E C7 34 - )6ÐHõBÖMÎ$lÜݤ…)#'7q@ŽÇ4
7B 7A 09 18 93 67 04 62 89 06 91 36 C1 43 52 53 06 DF 17 55 03 23 44 4D - {z..“g.b‰.‘6ÁCRS.ß.U.#DM
8D D5 24 76 27 34 4E 88 F6 C7 36 6F 22 D0 48 EC E0 8C CA E8 8F 73 73 C8 - .Õ$v'4NˆöÇ6o"ÐHìàŒÊè.ssÈ
A0 6E 40 43 67 A7 82 8B DA 68 D2 02 9B 5A 1A 27 2D BB 88 16 44 18 FB 60 - n@Cg§‚‹ÚhÒ.›Z.'-»ˆ.D.û`
06 89 39 BB 72 F0 C7 A0 1B 79 DC 46 A2 FB 58 1B 24 34 DB 3B 9A E5 D1 74 - .‰9»rðÇ .yÜF¢ûX.$4Û;šåÑt
DA 40 25 49 CD DC 9F 14 34 C5 41 16 3D 89 CB A3 02 80 6C 0D 68 1E E5 A2 - Ú@%IÍÜŸ.4ÅA.=‰Ë£.€l.h.å¢
5B 11 C9 82 35 A4 DC 80 B9 E9 60 51 34 24 4F 1B 04 D6 06 CC 1B 0A 24 C0 - [.É‚5¤Ü€¹é`Q4$O..Ö.Ì..$À
44 4A D9 62 06 A8 AE 8C F7 20 2C 8C DA D1 39 AC 9A 8B 84 AD 8C 92 D3 1C - DJÙb.¨®Œ÷ ,ŒÚÑ9¬š‹„.Œ’Ó.
86 92 5B 90 05 10 30 8D 9B B6 E5 2C 07 73 01 A1 22 78 D8 8E 08 AC 92 9B - †’[...0.›¶å,.s.¡"xØŽ.¬’›
9B B1 02 32 73 74 24 4F 1B - ›±.2st$O.
Mã nguồn:
[section .text]
[bits 16]
[org 0x100]
entry_point:
push word 63 ; no of bytes of packed data = (5/8) * unpacked_length - rounded up tp nearest byte
push word states_packed
push word states_unpacked
call unpack_bytes
push word 290 ; no bytes of packed data
push word capitals_packed
push word capitals_unpacked
call unpack_bytes
; ensure there's a terminating null after the capitals
mov si, nullTerminator
mov [si], byte 0
;void outputStrings(char *cities, char *states)
push word states_unpacked
push word capitals_unpacked
call output_strings
; int 0x20
ret
;void unpack_states(char *unpackedDest, char *packedInput, int packed_length)
;unpack_capitals:
unpack_bytes:
push bp
mov bp, sp
add bp, 4
mov si, [bp + 2] ; point to the packed input
mov di, [bp + 0] ; point to the output buffer
mov dh, 5 ; number of bits remaining until we have a full output byte, ready to be translated from [0..25] --> [A..Z] (+65) or 26-->' ' or 27-->','
xor bl, bl ; clear our output accumalator
.unpack_get_byte:
lodsb
mov dl, 8 ; number of bits remaining in this packed byte before we need another one
.unpack_get_next_bit:
rcl al, 1 ; put most significant bit into carry flag
rcl bl, 1 ; and put it into the least significant bit of our accumalator
dec dl ; 1 bit less before we need another packed byte
dec dh ; 1 bit less until this output byte is done
jnz .checkInputBitsRemaining
.transform_output_byte:
cmp bl, 26 ; space is encoded as 26
jne .notSpace
mov bl, ' '
jmp .store_output_byte
.notSpace:
cmp bl, 27 ; comma is encoded as 27
jne .notComma
mov bl, ','
jmp .store_output_byte
.notComma:
.alphaChar:
add bl, 'a' ; change from [0..25] to [A..Z]
.store_output_byte:
mov [di], bl ; store it
inc di ; point to the next output element
mov dh, 5 ; and reset the count of bits till we get here again
xor bl, bl
.checkInputBitsRemaining:
or dl,dl ; see if we've emptied the packed byte yet
jnz .unpack_get_next_bit
dec word [bp + 4] ; decrement the number of bytes of input remaining to be processed
jnz .unpack_get_byte ; if we still have some, go back for more
.unpack_input_processed:
pop bp
ret 6
; input:
; al = char
; outpt:
; if al if an alpha char, ensures it is in range [capital-a .. capital-z]
toupper:
push bx
mov bl, 98
dec bl ; bl = 'a'
mov bh, bl
add bh, 25 ; bh = 'z'
cmp al, bl ;'a'
jb .toupperdone
cmp al, bh
ja .toupperdone
mov bl, 32
sub al, bl ;'A' - 'a'
.toupperdone:
pop bx
ret
;void outputStrings(char *cities, char *states)
output_strings:
push bp
mov bp, sp
add bp, 4
mov si, [bp + 0] ; si --> array of cities
xor ax, ax
; mov [lastChar], al ; last printed char is undefined at this point - we'll use this to know if we're processing the first entry
mov dl, al
; mov [string_index], ax ; zero the string_index too
mov cx, ax ; zero the string_index too
.getOutputChar:
lodsb
test al, 0xff
jz .outputDone ; if we've got a NULL, it's the string terminator so exit
; cmp byte [lastChar], ' ' ; if the last char was a space, we have to capitalize this one
cmp dl, ' ' ; if the last char was a space, we have to capitalize this one
je .make_ucase
; cmp byte [lastChar], 0
or dl, dl ; if this is 0, then it's the first char we've printed, therefore we know it should be capitalized
jz .make_ucase
cmp al, ',' ; if this is a comma, the city is done, so print the comma then do the state and a crlf, finally, increment the string_index
jne .printChar
mov ah, 0xe ; code for print-char, teletype output
int 0x10 ; print the char held in al
; mov bx, [string_index]
mov bx, cx;[string_index]
add bx,bx ; x2 since each state is 2 bytes long
add bx, [bp+2] ; bx --> states_unpacked[string_index]
mov al, [bx] ; get the first char of the state
call toupper ; upper case it
; mov ah, 0xe ;not needed, still set from above
int 0x10 ; and print it
inc bx
mov al, [bx] ; get the 2nd char of the state
call toupper ; uppercase it
; mov ah, 0xe ;not needed, still set from above
int 0x10 ; and print it
mov al, 0x0d ; print a CRLF
int 0x10
mov al, 0x0a
int 0x10
mov byte [lastChar], 0 ; zero this, so that the first letter of the new city will be capitalized, just like the first char in the string was
xor dl, dl ; zero this, so that the first letter of the new city will be capitalized, just like the first char in the string was
; inc word [string_index] ; increment our index, ready for the next city's state
inc cx ;word [string_index] ; increment our index, ready for the next city's state
jmp .getOutputChar ; go back and get the next char of the next city
.make_ucase:
call toupper
.printChar:
mov ah, 0xe
int 0x10
; mov [lastChar], al
mov dl, al
jmp .getOutputChar ; go back and get the next char of the next city
.outputDone:
pop bp
ret 4 ; return and clean-up the two vars from the stack
[section .data]
; 63 packed bytes, 100 unpacked (saved 37)
states_packed:
db 01011000b, 00010000b, 11010111b, 00011100b, 00001011b, 01100100b, 11000100b, 11100100b
db 00001110b, 01110111b, 01100000b, 00011011b, 10000010b, 10101101b, 10101100b, 10011011b
db 01011010b, 10010110b, 00111010b, 10100000b, 10010000b, 11011110b, 00000110b, 00010010b
db 00101000b, 00011001b, 00011010b, 01111010b, 11001100b, 01010011b, 01010100b, 10011000b
db 11010000b, 00101001b, 10100100b, 01101000b, 10101100b, 10001011b, 00000000b, 00011001b
db 01100010b, 00001110b, 10000110b, 01001001b, 00001011b, 10010000b, 10011000b, 00111011b
db 01100010b, 10010011b, 00110000b, 00011010b, 00110101b, 01100001b, 11010001b, 00000100b
db 01010000b, 00000001b, 00000001b, 11001010b, 10110101b, 01011011b, 01010000b
; 290 packed bytes, 463 unpacked (saved 173)
capitals_packed:
db 00001000b, 00100110b, 11100110b, 11101010b, 00101110b, 10100001b, 10001001b, 10110100b, 00110100b, 01101000b, 00000011b, 01000000b, 11110111b, 00101101b
db 00010010b, 11011000b, 10011100b, 10111010b, 00110000b, 00110100b, 10010110b, 11011000b, 11100110b, 11001100b, 11001110b, 01100001b, 00100011b, 10001101b
db 10011100b, 10001011b, 00100011b, 01000001b, 10110001b, 10010001b, 10110101b, 00100100b, 01110110b, 00010111b, 00100010b, 01000100b, 11011000b, 00101001b
db 00101001b, 10100001b, 10111011b, 00001011b, 10100101b, 00110111b, 00110111b, 01100000b, 01011000b, 01000000b, 11011100b, 01101110b, 01100000b, 01011010b
db 11000000b, 01110000b, 01001010b, 01000100b, 00100110b, 11100100b, 00000110b, 11001100b, 00011010b, 00101001b, 00110110b, 11010000b, 01001000b, 11110101b
db 01000010b, 11010110b, 01001101b, 11001110b, 00100100b, 01101100b, 11011100b, 11011101b, 10100100b, 10000101b, 00101001b, 00100011b, 00100111b, 00110111b
db 01110001b, 01000000b, 10001110b, 11000111b, 00110100b, 01111011b, 01111010b, 00001001b, 00011000b, 10010011b, 01100111b, 00000100b, 01100010b, 10001001b
db 00000110b, 10010001b, 00110110b, 11000001b, 01000011b, 01010010b, 01010011b, 00000110b, 11011111b, 00010111b, 01010101b, 00000011b, 00100011b, 01000100b
db 01001101b, 10001101b, 11010101b, 00100100b, 01110110b, 00100111b, 00110100b, 01001110b, 10001000b, 11110110b, 11000111b, 00110110b, 01101111b, 00100010b
db 11010000b, 01001000b, 11101100b, 11100000b, 10001100b, 11001010b, 11101000b, 10001111b, 01110011b, 01110011b, 11001000b, 10100000b, 01101110b, 01000000b
db 01000011b, 01100111b, 10100111b, 10000010b, 10001011b, 11011010b, 01101000b, 11010010b, 00000010b, 10011011b, 01011010b, 00011010b, 00100111b, 00101101b
db 10111011b, 10001000b, 00010110b, 01000100b, 00011000b, 11111011b, 01100000b, 00000110b, 10001001b, 00111001b, 10111011b, 01110010b, 11110000b, 11000111b
db 10100000b, 00011011b, 01111001b, 11011100b, 01000110b, 10100010b, 11111011b, 01011000b, 00011011b, 00100100b, 00110100b, 11011011b, 00111011b, 10011010b
db 11100101b, 11010001b, 01110100b, 11011010b, 01000000b, 00100101b, 01001001b, 11001101b, 11011100b, 10011111b, 00010100b, 00110100b, 11000101b, 01000001b
db 00010110b, 00111101b, 10001001b, 11001011b, 10100011b, 00000010b, 10000000b, 01101100b, 00001101b, 01101000b, 00011110b, 11100101b, 10100010b, 01011011b
db 00010001b, 11001001b, 10000010b, 00110101b, 10100100b, 11011100b, 10000000b, 10111001b, 11101001b, 01100000b, 01010001b, 00110100b, 00100100b, 01001111b
db 00011011b, 00000100b, 11010110b, 00000110b, 11001100b, 00011011b, 00001010b, 00100100b, 11000000b, 01000100b, 01001010b, 11011001b, 01100010b, 00000110b
db 10101000b, 10101110b, 10001100b, 11110111b, 00100000b, 00101100b, 10001100b, 11011010b, 11010001b, 00111001b, 10101100b, 10011010b, 10001011b, 10000100b
db 10101101b, 10001100b, 10010010b, 11010011b, 00011100b, 10000110b, 10010010b, 01011011b, 10010000b, 00000101b, 00010000b, 00110000b, 10001101b, 10011011b
db 10110110b, 11100101b, 00101100b, 00000111b, 01110011b, 00000001b, 10100001b, 00100010b, 01111000b, 11011000b, 10001110b, 00001000b, 10101100b, 10010010b
db 10011011b, 10011011b, 10110001b, 00000010b, 00110010b, 01110011b, 01110100b, 00100100b, 01001111b, 00011011b
[section .bss]
lastChar resb 1 ; last printed char - used to capitalize chars after a space (i.e the 2nd or third word of a city name)
string_index resw 1 ; used to index into the array of states, which are each two bytes
states_unpacked resb 100 ; 50 states, 2 bytes each
capitals_unpacked resb 464
nullTerminator resb 1