aboutsummaryrefslogtreecommitdiff
blob: 6a63d36861aa305f55c91756ced6b90efd499893 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE guide SYSTEM "/dtd/guide.dtd">

<guide link="/proj/en/hardened/pic-fix-guide.xml" lang="en">
<title>HOWTO Locate and Fix .text Relocations (TEXTRELs)</title>

<author title="Author">
 <mail link="vapier@gentoo.org">Mike Frysinger</mail>
</author>
<author title="Author">
 <mail link="solar@gentoo.org">solar</mail>
</author>
<author title="Contributor">
 <mail link="pageexec@freemail.hu">The PaX team</mail>
</author>
<author title="Contributor">
 <mail link="kevquinn@gentoo.org">Kevin F. Quinn</mail>
</author>

<abstract>A guide for tracking down and fixing .text relocations (TEXTRELs)</abstract>

<!-- The content of this document is placed into the public domain, have fun -->

<version>1.2</version>
<date>2007-08-19</date>

<chapter>
<title>Introduction</title>
<section>
<body>

<p>
You should make sure to read the <uri link="pic-guide.xml">Introduction to 
Position Independent Code</uri> before tackling this guide.
</p>

<p>
This guide is x86-centric for now.  The reason being, the majority of broken 
object files are due to poorly written x86 assembly stemming from the simple 
fact that the x86 architecture has so few registers.  Other architectures have 
a large enough register set that they can reserve a register as the "PIC 
register" without incurring a performance hit.  Every architecture has to be 
mindful of PIC and its implications, x86 just happens to be the dominant 
architecture at the moment in the 'desktop' world of open source.
</p>

<p>
We will update for non-x86 as we aquire details and useful examples.
</p>

</body>
</section>
</chapter>

<chapter>
<title>Finding broken object code</title>
<section>
<body>

<p>
Before you can start fixing something, you got to make sure it's broken first, 
right?  For this reason, we've developed a suite of tools named <uri 
link="/proj/en/hardened/pax-utils.xml">PaX Utilities</uri>.  If you are not 
familiar with these utilities, you should read the <uri 
link="/proj/en/hardened/pax-utils.xml">PaX Utilities Guide</uri> now.  Gentoo 
users can simply do <c>emerge pax-utils</c>.  Non-Gentoo users should be able 
to find a copy of the source tarball in the <path>distfiles</path> on a <uri 
link="/main/en/mirrors.xml">Gentoo Mirror</uri>.  Once you have the PaX 
Utilities setup on your system, we can start playing around with 
<c>scanelf</c>.
</p>

<p>
Keep in mind that although these utilities are named PaX Utilities, they 
certainly do not require PaX or anything else like that on your system.  
The name is a historical artifact and want of a better name, has stuck.
</p>

<p>
Let's see if your system has any broken files.
</p>

<pre caption="Scan your system">
$ <i>scanelf -lpqt</i>
TEXTREL  /usr/lib/opengl/xorg-x11/lib/libGL.so.1.2
TEXTREL  /usr/lib/libSDL-1.2.so.0.7.2
TEXTREL  /usr/lib/libdv.so.4.0.2
TEXTREL  /usr/lib/libsmpeg-0.4.so.0.1.3
TEXTREL  /usr/lib/libOSMesa.so.4.0
TEXTREL  /usr/lib/libxvidcore.so.4.1
</pre>

<p>
Ideally, scanelf should not display anything, but on an x86 system, this is 
rarely the case.  Here we can see six libraries with TEXTRELs in them.  
To quickly find out what package these files come from, Gentoo users can 
<c>emerge portage-utils</c> and use <c>qfile</c>.
</p>

<pre caption="Determine the broken packages">
$ <i>qfile `scanelf -qylpF%F#t`</i>
media-libs/libdv (/usr/lib/libdv.so.4.0.2)
media-libs/libsdl (/usr/lib/libSDL-1.2.so.0.7.2)
media-libs/smpeg (/usr/lib/libsmpeg-0.4.so.0.1.3)
media-libs/xvid (/usr/lib/libxvidcore.so.4.1)
x11-base/xorg-x11 (/usr/lib/opengl/xorg-x11/lib/libGL.so.1.2)
x11-base/xorg-x11 (/usr/lib/libOSMesa.so.4.0)
</pre>

<p>
Now that we know the offenders, we have a choice.  We can file a bug upstream 
(who generally don't care unless you can provide a fix), file a bug in the 
<uri link="http://bugs.gentoo.org/">Gentoo Bugzilla</uri> (which is a nice 
lazy cop out), or we can fix it ourselves (that is why you're reading this 
guide right?).  You should double check that the package version you have 
installed is the latest upstream has to offer and the latest version your 
distro has to offer.  Who knows, maybe you can get lucky and someone else has 
already fixed it.  If you wish to get feedback on your work, feel free to 
contact the <mail link="hardened@gentoo.org">Gentoo hardened team</mail>.
</p>

</body>
</section>

<section>
<title>"False" Positives</title>
<body>

<p>
Sometimes you may come across a package which contains a mountain of TEXTRELs 
with seemingly no relation to assembler.  This may simply be because the 
objects were not properly compiled with the appropriate PIC flag.  The fix is 
quite simple: make sure every object file that is linked into the final shared 
object is compiled with the appropriate PIC flag (typically -fPIC).
</p>

<p>
For example, let's look at the silc-plugin package.  It builds up a few 
modules, but only compiles some of the objects with -fPIC that are linked into
the final libsilc_core.so module.  The output of scanelf here is quite 
extensive!
</p>

<pre caption="Run scanelf on silc-plugin">
$ <i>scanelf -qT /usr/lib/irssi/modules/libsilc_core.so | wc -l</i>
10734
$ <i>scanelf -qT /usr/lib/irssi/modules/libsilc_core.so</i>
...
  libsilc_core.so: silc_client_ftp_ask_name [0xD542C] in silc_client_receive_new_id [0xD5380]
  libsilc_core.so: silc_client_run_one [0xD55CA] in silc_client_receive_new_id [0xD5380]
  libsilc_core.so: silc_id_payload_parse [0xD5842] in silc_client_packet_parse_type [0xD57B0]
  libsilc_core.so: fgetc@@GLIBC_2.0 [0xD5857] in silc_client_packet_parse_type [0xD57B0]
...
</pre>

<p>
A TEXTREL on glibc's fgetc() function!?  Either people are calling fgetc() from
assembler (and should be shot), or something else is going on.  A good rule of
thumb is that if it seems like just about every function/variable reference 
causes a TEXTREL and it is all done in C code, then the file was not built as
PIC.  Just review the build output and see if the command to compile it was
invoked with -fPIC.  If not, go fix the build system as you do not need to dig
into the source.  Dodged the bullet this time!
</p>

</body>
</section>
</chapter>

<chapter>
<title>Dissecting broken object code</title>
<section>
<body>

<p>
So we have identified some broken libraries, and we want to fix them.  The 
trouble is, shared library code can be huge.  They can have thousands of 
functions which come from thousands of object files and thousands of source 
code files which total megabytes in size (source code and compiled objects).  
Where the hell do we start!?  Once again, Mighty Mouse^W^W<c>scanelf</c> is 
here to save the day.  Before we dive into source code, lets check out a few 
libraries.
</p>

</body>
</section>

<section>
<title>Dissect libsmpeg</title>
<body>

<pre caption="Scan libsmpeg">
<comment>[The output has been truncated from about 35 lines]</comment>
$ <i>scanelf -qT /usr/lib/libsmpeg-0.4.so.0.1.3</i>
  libsmpeg-0.4.so.0.1.3: (memory/fake?) [0x2FB3C] in cpu_flags [0x2FB10]
  libsmpeg-0.4.so.0.1.3: (memory/fake?) [0x2FB42] in cpu_flags [0x2FB10]
  libsmpeg-0.4.so.0.1.3: (memory/fake?) [0x2FB55] in IDCT_mmx [0x2FB48]
  libsmpeg-0.4.so.0.1.3: (memory/fake?) [0x2FB84] in IDCT_mmx [0x2FB48]
  /usr/lib/libsmpeg-0.4.so.0.1.3
</pre>

<p>
The output here tells us that the <e>cpu_flags</e> and the <e>IDCT_mmx</e> 
functions are to blame for our TEXTRELs.  The first field indicates that this 
is poor usage of memory references.  Unfortunately, the symbolic name of the 
memory being referenced has not been retained in the object code (probably 
because the code is hand written assembly), so we need to do a little more 
digging.  This is where the offset addresses come in to play along with the 
<c>objdump</c> utility from the <e>binutils</e> package.  The first address 
(e.g. 0x2FB3C) is the offset of the TEXTREL while the second address is the 
offset of the function (e.g. 0x2FB10).  Get used to this because the behavior 
of not retaining the symbol name is quite common.
</p>

<pre caption="Dissecting libsmpeg">
$ <i>objdump -d /usr/lib/libsmpeg-0.4.so.0.1.3</i>
...
   2fb0f:       90                      nop

<i>0002fb10 &lt;cpu_flags&gt;:</i>
   2fb10:       9c                      pushf
   2fb11:       58                      pop    %eax
...
   2fb32:       60                      pusha
   2fb33:       b8 01 00 00 00          mov    $0x1,%eax
   2fb38:       0f a2                   cpuid
   <i>2fb3a:       89 15 d0 d3 03 00       mov    %edx,0x3d3d0</i>
   2fb40:       61                      popa
   <i>2fb41:       a1 d0 d3 03 00          mov    0x3d3d0,%eax</i>
   2fb46:       c3                      ret
   2fb47:       90                      nop
...
</pre>

<p>
As you can see here, the two lines picked out in the body of <e>cpu_flags</e> 
have absolute memory references.  In this case, they both refer to memory 
location <e>0x3d3d0</e>.  Since this object code may be loaded into any 
address, using an aboslute reference obviously won't fly.  That means 
everytime libsmpeg is loaded into memory, the dynamic loader has to rewrite 
the <e>0x3d3d0</e> to the actual calculated address on the fly.
</p>

</body>
</section>

<section>
<title>Dissect libdv</title>
<body>

<pre caption="Scan libdv">
<comment>[The output has been truncated from about 180 lines]</comment>
$ <i>scanelf -qT /usr/lib/libdv.so.4.0.2</i>
  libdv.so.4.0.2: (memory/fake?) [0x14AA9] in dv_parse_ac_coeffs_pass0 [0x14A84]
  libdv.so.4.0.2: (memory/fake?) [0x14C28] in dv_parse_ac_coeffs_pass0 [0x14A84]
  libdv.so.4.0.2: (memory/fake?) [0x14C8A] in dv_parse_video_segment [0x14C6F]
  libdv.so.4.0.2: (memory/fake?) [0x14CA6] in dv_parse_video_segment [0x14C6F]
  libdv.so.4.0.2: (memory/fake?) [0x15248] in _dv_idct_block_mmx [0x15210]
  libdv.so.4.0.2: (memory/fake?) [0x152BE] in _dv_idct_block_mmx [0x15210]
  libdv.so.4.0.2: (memory/fake?) [0x1583D] in _dv_dct_88_block_mmx [0x157F8]
  libdv.so.4.0.2: (memory/fake?) [0x15847] in _dv_dct_88_block_mmx [0x157F8]
  libdv.so.4.0.2: (memory/fake?) [0x15F91] in _dv_dct_248_block_mmx [0x15F58]
  libdv.so.4.0.2: (memory/fake?) [0x15FE6] in _dv_dct_248_block_mmx [0x15F58]
  libdv.so.4.0.2: (memory/fake?) [0x163D3] in _dv_rgbtoycb_mmx [0x163C8]
  libdv.so.4.0.2: (memory/fake?) [0x163DD] in _dv_rgbtoycb_mmx [0x163C8]
  libdv.so.4.0.2: dv_vlc_class_index_mask [0x149A7] in dv_decode_vlc [0x14998]
  libdv.so.4.0.2: dv_vlc_class_index_rshift [0x149B0] in dv_decode_vlc [0x14998]
  libdv.so.4.0.2: dv_vlc_classes [0x149B9] in dv_decode_vlc [0x14998]
  libdv.so.4.0.2: dv_vlc_index_mask [0x149C4] in dv_decode_vlc [0x14998]
  libdv.so.4.0.2: sign_mask [0x149EB] in dv_decode_vlc [0x14998]
  libdv.so.4.0.2: sign_mask [0x14A5D] in __dv_decode_vlc [0x14A1C]
  libdv.so.4.0.2: sign_mask [0x14B82] in dv_parse_ac_coeffs_pass0 [0x14A84]
  libdv.so.4.0.2: dv_vlc_class_lookup5 [0x14A2F] in __dv_decode_vlc [0x14A1C]
  libdv.so.4.0.2: dv_parse_ac_coeffs_pass0 [0x14E03] in dv_parse_video_segment [0x14C6F]
  libdv.so.4.0.2: dv_parse_ac_coeffs [0x14E51] in dv_parse_video_segment [0x14C6F]
  libdv.so.4.0.2: dv_quant_offset [0x14E69] in _dv_quant_88_inverse_x86 [0x14E5C]
  libdv.so.4.0.2: dv_quant_offset [0x14FB3] in _dv_quant_x86 [0x14FA4]
  /usr/lib/libdv.so.4.0.2
</pre>

<p>
Again, we can see that many functions (like <e>dv_parse_ac_coeffs_pass0</e> 
and <e>_dv_idct_block_mmx</e>) have absolute memory references.  What we also 
see is that a bunch of functions which refer to variables.  For example, 
<e>dv_decode_vlc</e> misuses the variable <e>dv_vlc_class_index_mask</e> while 
<e>dv_parse_video_segment</e> misuses the variable <e>dv_parse_ac_coeffs</e>.  
Much easier to locate the problem in the source code when you have the symbol 
name.
</p>

</body>
</section>

<section>
<title>Dissect libSDL</title>
<body>

<pre caption="Scan libSDL">
<comment>[The output has been truncated from about 50 lines]</comment>
$ <i>scanelf -qT /usr/lib/libSDL-1.2.so.0.7.2</i>
  libSDL-1.2.so.0.7.2: (memory/fake?) [0x4E213] in _ConvertMMXpII32_24RGB888 [0x4E210]
  libSDL-1.2.so.0.7.2: (memory/fake?) [0x4E29E] in _ConvertMMXpII32_16RGB565 [0x4E29B]
  libSDL-1.2.so.0.7.2: (memory/fake?) [0x4E3F6] in _ConvertMMXpII32_16BGR555 [0x4E3F3]
  libSDL-1.2.so.0.7.2: (memory/fake?) [0x4E402] in _ConvertMMXpII32_16RGB555 [0x4E3FF]
  libSDL-1.2.so.0.7.2: (memory/fake?) [0x4E555] in _Hermes_X86_CPU [0x4E529]
  libSDL-1.2.so.0.7.2: _copy_row [0x316A2] in SDL_SoftStretch [0x313C0]
  libSDL-1.2.so.0.7.2: _mmxreturn [0x4E4FB] in _ConvertMMXpII32_16RGB555 [0x4E3FF]
  libSDL-1.2.so.0.7.2: _x86return [0x4E590] in _ConvertX86p16_16BGR565 [0x4E560]
  libSDL-1.2.so.0.7.2: _x86return [0x4EE99] in _ConvertX86p32_16BGR555 [0x4EDCA]
  libSDL-1.2.so.0.7.2: _x86return [0x4EF4D] in _ConvertX86p32_8RGB332 [0x4EE9D]
  /usr/lib/libSDL-1.2.so.0.7.2
</pre>

<p>
Doesn't seem to be anything new here.  Poor memory usage in functions like 
<e>_ConvertMMXpII32_24RGB888</e> and no symbol name which means it's probably 
pure hand written assembler.  The <e>SDL_SoftStretch</e> function misuses the 
symbol <e>_copy_row</e> and since the symbol name has been retained, it's 
probably inline assembly code.
</p>

</body>
</section>
</chapter>

<chapter>
<title>Finding the broken source code</title>
<section>
<body>

<p>
We've identified the functions and sometimes the variables which are causing 
us such headaches.  Before we can actually fix them though, we have to narrow 
down the source code to the offending lines.  Since we know the function 
names and either the symbol name or a relative position in the function, we 
should be able to focus our efforts quite easily.
</p>

</body>
</section>

<section>
<title>libsmpeg source dive</title>
<body>

<p>
Let's start with libsmpeg.  We know that both the <e>cpu_flags</e> and 
<e>IDCT_mmx</e> functions are broken.  But where are they defined?
</p>

<pre caption="Searching libsmpeg source">
$ <i>tar zxf smpeg-0.4.4.tar.gz</i>
$ <i>cd smpeg-0.4.4.tar.gz</i>

<comment>[Find the cpu_flags function]</comment>
$ <i>grep -Rl cpu_flags *</i>
video/mmxflags_asm.S
video/parseblock.cpp
$ <i>grep cpu_flags video/mmxflags_asm.S</i>
.globl cpu_flags
        .type    cpu_flags,@function <comment>&lt;-- here is what we want</comment>
cpu_flags:
        jz cpu_flags.L1   # Processor is 386
        je cpu_flags.L1
cpu_flags.L1:
        .size    cpu_flags,.Lfe1-cpu_flags

<comment>[Find the IDCT_mmx function]</comment>
$ <i>grep -Rl IDCT_mmx *</i>
video/parseblock.cpp
video/mmxidct_asm.S
$ <i>grep IDCT_mmx video/mmxidct_asm.S</i>
.globl IDCT_mmx
        .type    IDCT_mmx,@function <comment>&lt;-- here is what we want</comment>
IDCT_mmx:
        .size    IDCT_mmx,.Lfe1-IDCT_mmx
</pre>

<p>
As we suspected, both the <e>cpu_flags</e> and the <e>IDCT_mmx</e> functions 
are written in pure assembly code.  This makes tracking down the unamed 
memory reference easier because the source code should closely match the 
output of <c>objdump</c>.  If we review the output from earlier, we know the 
<e>cpuid</e> instruction is used.  Since it isn't a common instruction, we 
search for it in the source code.
</p>

<pre caption="Find cpuid in cpu_flags">
$ <i>grep -A 8 cpuid video/mmxflags_asm.S</i>
        cpuid

        movl %edx,flags

        popa

        movl flags,%eax

cpu_flags.L1:
</pre>

<p>
In GNU assembler, registers are prefixed with a <e>%</e> and constants are 
prefixed with a <e>$</e>, that <e>flags</e> looks suspicious.  It also lines 
up well with the <c>objdump</c> output from earlier.  So what is <e>flags</e>?
</p>

<pre caption="What is 'flags'?">
$ <i>grep -C 2 flags video/mmxflags_asm.S</i>
.data
        .align 16
        .type    flags,@object
flags: .long 0

.text
</pre>

<p>
Seems <e>flags</e> is a data variable local to <path>mmxflags_asm.S</path> 
which functions access with absolute memory references rather than relative.  
Now we are pretty much done.  That's all there is to it.  We started with the 
library <path>libsmpeg.so</path> and tracked it back to the function 
<e>cpu_flags</e> and the variable <e>flags</e> in the 
<path>video/mmxflags_asm.S</path> file.  That wasn't so hard now was it? :)
</p>

<p>
If we analyze the <e>IDCT_mmx</e> function, we find a similar trend.
</p>

<pre caption="IDCT_mmx snippets">
<comment>[Local variables]</comment>
.data
    .align 8
    .type   x4546454645464546,@object
    .size   x4546454645464546,8
<i>x4546454645464546</i>:
    .long   0x45464546,0x45464546

    .align 8
    .type   x61f861f861f861f8,@object
    .size   x61f861f861f861f8,8
<i>x61f861f861f861f8</i>:
    .long   0x61f861f8,0x61f861f8

    .align 8
    .type    scratch1,@object
    .size    scratch1,8
<i>scratch1</i>:
    .long 0,0

<comment>[Absolute memory references]</comment>
.text
...
    psraw $1, %mm5          /* t90=t93 */
    pmulhw <i>x4546454645464546</i>,%mm0   /* V35 */
    psraw $1, %mm2          /* t97 */
...
    psubsw %mm2, %mm1       /* V32 ; free mm2 */
    pmulhw <i>x61f861f861f861f8</i>,%mm1   /* V36 */
    psllw $1, %mm7          /* t107 */
...
    movq 8*3(%esi), %mm7
    psraw $4, %mm4
    movq %mm2, <i>scratch1</i>     /* out1 */
</pre>

</body>
</section>

<section>
<title>libSDL source dive</title>
<body>

<p>
Again, before we jump into how to fix these, lets analyze a few more source 
files to get a better handle on identifying problematic code.
</p>

<pre caption="Broken _ConvertMMXpII32_24RGB888 in libSDL code">
<comment>[objdump of _ConvertMMXpII32_24RGB888 memory reference]</comment>
<i>0004e210 &lt;_ConvertMMXpII32_24RGB888&gt;:</i>
   <i>4e210:       0f 6f 35 50 55 05 00    movq   0x55550,%mm6</i>
   4e217:       0f ef ff                pxor   %mm7,%mm7

<comment>[_ConvertMMXpII32_24RGB888 is defined in src/hermes/mmxp2_32.asm]</comment>
	SECTION .data
ALIGN 8
;; Constants for conversion routines
mmx32_rgb888_mask dd 00ffffffh,00ffffffh
...
	SECTION .text
_ConvertMMXpII32_24RGB888: <comment>start of function 0x4E210</comment>
        ; set up mm6 as the mask, mm7 as zero
        movq mm6, qword [mmx32_rgb888_mask] <comment>memory reference 0x4E213</comment>
        pxor mm7, mm7
</pre>

<p>
Simple enough, the <e>_ConvertMMXpII32_24RGB888</e> function refers to the 
<e>mmx32_rgb888_mask</e> variable.
</p>

<pre caption="Broken SDL_SoftStretch in libSDL code">
<comment>[SDL_SoftStretch is defined in src/video/SDL_stretch.c]</comment>
int SDL_SoftStretch(SDL_Surface *src, SDL_Rect *srcrect,
                    SDL_Surface *dst, SDL_Rect *dstrect)
{
...
#ifdef __GNUC__
            __asm__ __volatile__ (
            "call _copy_row"
            : "=&amp;D" (u1), "=&amp;S" (u2)
            : "0" (dstp), "1" (srcp)
            : "memory" );
#else
</pre>

<p>
Another straight forward bug.  An absolute reference to the  <e>_copy_row</e> 
variable in assembly.  If we were to let gcc handle the <e>_copy_row</e> 
reference instead though ...
</p>

</body>
</section>
</chapter>

<chapter>
<title>How to write PIC (in theory)</title>

<section>
<title>Rules of thumb</title>
<body>

<p>
Now we know what broken code looks like.  We can point out issues in code and 
confidently declare "that crap is broken".  While this is a good thing, it 
certainly doesn't help much if no one knows how it's supposed to be written.  
Let's start with some rules of thumb.
</p>

<p>General rules</p>
<ul>
 <li>Do not mix PIC and non-PIC object code</li>
 <li>Shared libraries contain PIC objects</li>
 <li>Static libraries contain non-PIC objects (normal/non-PIE systems only)</li>
 <li>Let gcc figure out the details whenever possible (e.g. inline asm)</li>
 <li>Use the stack for loading of large masks instead of variables</li>
 <li>Do not clobber the PIC register when generating PIC objects</li>
</ul>

<p>x86-specific rules</p>
<ul>
 <li>Use @GOT relocations when using external symbols</li>
 <li>Use @GOTOFF relocations when using local symbols</li>
</ul>

</body>
</section>

<section>
<title>PIC registers by architecture</title>
<body>

<table>
 <tr><th>arch</th><th>register</th></tr>
 <tr><ti>blackfin</ti><ti>P3</ti></tr>
 <tr><ti>frv</ti><ti>GR15</ti></tr>
 <tr><ti>hppa</ti><ti>r19</ti></tr>
 <tr><ti>x86</ti><ti>ebx</ti></tr>
</table>

</body>
</section>
</chapter>

<chapter>
<title>Cookie cutter PIC fixes</title>

<section>
<title>Don't use the PIC register</title>
<body>

<p>
If you come across code which uses the PIC register in some inline assembly, 
one fix may be to simply use a different register.  For example, the x86 
architecture has 6 general purpose registers (<e>eax</e>, <e>ebx</e>, 
<e>ecx</e>, <e>edx</e>, <e>esi</e>, <e>edi</e>).  If the code uses just 
<e>eax</e> and <e>ebx</e>, just change all references to <e>ebx</e> to 
<e>ecx</e> and you're done!
</p>

<p>
A cleaner fix might be to just let gcc allocate the registers accordingly.  If 
the inline assembly doesn't actually care which registers it uses, change the 
references from <e>ebx</e> to <e>r</e> in the clobber list, and refer to the 
variable by number.
</p>

<p>
Or, if the assembly uses an instruction which always clobbers <e>ebx</e> (e.g. 
<e>cpuid</e>), simply hide the value in another register (like <e>esi</e>).
</p>

<p>
If all else fails, you can fall back to the slow push/pop <e>ebx</e> on the 
stack method.
</p>

<pre caption="Just don't use the PIC register">
<comment>/* change this code from */</comment>
asm("
    mov %0, %%eax
    mov %1, %%ebx
    add %%eax, %%ebx
    " : : "m"(input1), "m"(input2) : "eax" "ebx");

<comment>/* to this functionality equivalent version */</comment>
asm("
    mov %0, %%eax
    mov %1, %%ecx
    add %%eax, %%ecx
    " : : "m"(input1), "m"(input2) : "eax" "ecx");
</pre>

<pre caption="Let gcc allocate registers">
<comment>/* change this code from */</comment>
asm("
    mov %2, %%eax
    mov %3, %%ebx
    add %%eax, %%ebx
    " : "=a"(output1) "=b"(output2) : "m"(input1), "m"(input2));

<comment>/* to this functionality equivalent version */</comment>
asm("
    mov %2, %0
    mov %3, %1
    add %0, %1
    " : "=r"(output1) "=r"(output2) : "m"(input1), "m"(input2));
</pre>

<pre caption="Hide the PIC register">
asm("cpuid" : : : "eax", "ebx", "ecx", "edx");

<comment>/* can be written to hide ebx */</comment>
asm("
    movl %%ebx, %%edi
    cpuid
    movl %%edi, %%ebx
    " : : : "eax", "ecx", "edx", "edi");

<comment>/* or a slower version using the stack */</comment>
asm("
    pushl %%ebx
    cpuid
    popl %%ebx
    " : : : "eax", "ecx", "edx");
</pre>

</body>
</section>

<section>
<title>MMX/SSE masks</title>
<body>

<p>
A lot of x86 MMX/SSE code loads bitmasks from local variables since they need 
to fill up a register which is larger (MMX/64bits or SSE/128bits) than the 
native bitsize (x86/32bits).  They do this by defining the mask in 
consecutive bytes in memory and then having the cpu load the data from the 
memory region.
</p>

<p>
One way to get around this is by being creative with the stack.  Rather than 
use an absolute memory reference for the mask, push a bunch of 32bit values 
onto the stack and use the address specified by the <e>esp</e> register.  
Once you're finished, just add a constant to <e>esp</e> rather than popping 
off since you don't care about the actual values once they are loaded into 
the MMX/SSE registers.
</p>

<pre caption="Load masks into registers via stack">
<comment>/* Load masks from memory (causes TEXTRELs) */</comment>
	.data
m0X000000: .byte   0,   0,   0,   0,   0,   0, 255,   0
	.text
movq	m0X000000, %mm5

<comment>/* Load mask from stack (no TEXTRELs)*/</comment>
pushl	$0x00FF0000
pushl	$0x00000000
movq	(%esp), %mm5
addl	8, %esp
</pre>

</body>
</section>

<section>
<title>Let gcc worry about it</title>
<body>

<p>
A lot of inline assembly is written with the symbol names placed right in the 
code.  Rather than trying to write custom code to handle PIC in assembly, just 
let gcc worry about it.  Pass in the symbol via the input operand list as a 
memory constraint ("m") and gcc will handle all the rest.
</p>

<pre caption="How to make gcc worry about it">
unsigned long long a_mmx_mask = 0xf8007c00ffea0059ULL;
void somefunction()
{
	<comment>/* Common (but incorrect) method for loading masks */</comment>
	asm("pmullw a_mmx_mask, %%mm0" : : );

	<comment>/* The correct way is to let gcc do it */</comment>
	asm("pmullw %0, %%mm0" : : "m"(a_mmx_mask));
}
</pre>

<p>
If your get a warning/error about one of the memory inputs needing to be an 
lvalue, then this usually means you're trying to pass in a pointer to an 
array/structure rather than the memory location itself.  Fixing this may be as 
simple as dereferencing the variable in the constraint list rather than in the 
assembly itself.
</p>

</body>
</section>

<section>
<title>Thunk it in assembly</title>
<body>

<p>
Hand written assembly sometimes need to access variables (whether they be 
local or global).  Since none of the previous tricks will work, you just need 
to grind your teeth and dig in to write real PIC references yourself using 
the GOT.  Make sure you keep in mind the first rule of thumb: Do not mix PIC 
and non-PIC object code.  This probably will require the hand written 
assembly be preprocessed before it is assembled, so an assembly source file 
with a <e>.s</e> suffix will not work.  It needs to be <e>.S</e>.
</p>

<p>
Also keep in mind that using @GOTOFF will return the variable while using @GOT 
will return a pointer to the variable.  So accessing a variable with @GOT will 
require two steps.
</p>

<pre caption="How to refer to variables via the GOT">
#ifdef __PIC__

# undef __i686 /* gcc builtin define gets in our way */
# define MUNG_LOCAL(sym)   sym ## @GOTOFF(%ebx)
# define MUNG_EXTERN(sym)  sym ## @GOT(%ebx)
# define DEREF_EXTERN(reg) movl (reg), reg
# define INIT_PIC() \
	call __i686.get_pc_thunk.bx ; \
	addl $_GLOBAL_OFFSET_TABLE_, %ebx

#else

# define MUNG_LOCAL(sym)   sym
# define MUNG_EXTERN(sym)  sym
# define DEREF_EXTERN(reg)
# define INIT_PIC()

#endif

...
some_function:
...
	<comment>/* needs to be before first memory reference */</comment>
	INIT_PIC()
...
	movl MUNG_EXTERN(some_external_variable), %eax
	DEREF_EXTERN(%eax)
...
	movl %eax, MUNG_LOCAL(some_local_variable)
...

#ifdef __PIC__
	.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
.globl __i686.get_pc_thunk.bx
	.hidden  __i686.get_pc_thunk.bx
	.type    __i686.get_pc_thunk.bx,@function
__i686.get_pc_thunk.bx:
	movl (%esp), %ebx
	ret
#endif
</pre>

<note>
Usage of <e>ebx</e> as the register to do relative addressing is not required, 
it is just common convention.  The above examples could just as easily be done 
by using <e>ecx</e> or <e>edx</e>.
</note>

<p>
Since we hide the PIC details behind the preprocessor define <e>__PIC__</e>, 
we know that the correct code will be generated for both the PIC and non-PIC 
cases.
</p>

<p>
The <e>__i686.get_pc_thunk.bx</e> function is a standard method for acquiring 
the address of the GOT at runtime and storing the result in <e>ebx</e>.  The 
funky name is what gcc uses by convention when generating PIC objects, so we 
too use the same name.  The <e>@GOT</e> and <e>@GOTOFF</e> notation tells the 
assembler where to find the variables in memory.  The <e>.section 
.gnu.linkonce.t</e> is useful because it tells the linker to only include one 
instance of this function in the final object code.  So if you have multiple 
files which declare this same function which are compiled and linked into the 
same final library, the linker will discard all duplicate instances of the 
function thus saving space (which is always a good thing).
</p>

</body>
</section>
</chapter>

<chapter>
<title>How to fix broken PIC (in practice)</title>
<section>
<body>

<p>
So if the previous code snippets were broken, what should they look like you 
may wonder.  Well let's find out.
</p>

</body>
</section>

<section>
<title>Fix libsmpeg</title>
<body>

<pre caption="Fixing cpu_flags in libsmpeg by rewriting it">
<comment>[Non-PIC Version]</comment>
.type flags,@object
flags: .long 0
...
	pusha
	movl $1,%eax
	cpuid
	movl %edx,flags
	popa
	movl flags,%eax


<comment>[PIC Version]</comment>
	pushl %ebx
	movl $1,%eax
	cpuid
	movl %edx,%eax
	popl %ebx
</pre>

<pre caption="Fixing IDCT_mmx in libsmpeg by using relative addressing">
<comment>[Non-PIC Version]</comment>
	pmulhw x5a825a825a825a82, %mm1


<comment>[PIC Version]</comment>
#ifdef __PIC__
# undef __i686 /* gcc define gets in our way */
	call __i686.get_pc_thunk.bx
	addl $_GLOBAL_OFFSET_TABLE_, %ebx
#endif
...
	pmulhw x5a825a825a825a82@GOTOFF(%ebx), %mm1
...
#ifdef __PIC__
	.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
.globl __i686.get_pc_thunk.bx
	.hidden  __i686.get_pc_thunk.bx
	.type    __i686.get_pc_thunk.bx,@function
__i686.get_pc_thunk.bx:
	movl (%esp), %ebx
	ret
#endif
</pre>

</body>
</section>

<section>
<title>Fix libSDL</title>
<body>

<pre caption="Fixing _ConvertMMXpII32_24RGB888 in libSDL">
<comment>[Non-PIC Version]</comment>
mmx32_rgb888_mask dd 00ffffffh,00ffffffh
...
	movq mm6, qword [mmx32_rgb888_mask]


<comment>[PIC Version]</comment>
%macro _push_immq_mask 1
	push dword %1
	push dword %1
%endmacro
%macro load_immq 2
	_push_immq_mask %2
	movq %1, [esp]
%endmacro
%define mmx32_rgb888_mask 00ffffffh
...
	load_immq mm6, mmx32_rgb888_mask
	CLEANUP_IMMQ_LOADS(1)
</pre>

<pre caption="Fixing SDL_SoftStretch in libSDL">
<comment>[Non-PIC Version]</comment>
	__asm__ __volatile__ (
		"call _copy_row"
		: "=&amp;D" (u1), "=&amp;S" (u2)
		: "0" (dstp), "1" (srcp)
		: "memory" );


<comment>[PIC Version]</comment>
	__asm__ __volatile__ (
		"call *%4"
		: "=&amp;D" (u1), "=&amp;S" (u2)
		: "0" (dstp), "1" (srcp), "r" (&amp;_copy_row)
		: "memory" );
</pre>

</body>
</section>
</chapter>

<chapter>
<title>References</title>
<section>
<body>

<ul>
 <li>thanks to the PaX team for holding my hand</li>
 <li><uri link="http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html">GCC Inline Assembly HOWTO</uri></li>
 <li><uri link="http://nasm.sourceforge.net/">NASM</uri>'s Documentation on <uri link="http://nasm.sourceforge.net/doc/html/nasmdoc6.html#section-6.5.2">ELF shared libraries</uri></li>
 <li>Linkers and Loaders <uri link="http://www.iecc.com/linker/linker08.html">chapter 8</uri> and <uri link="http://www.iecc.com/linker/linker10.html">chapter 10</uri></li>
</ul>

</body>
</section>

</chapter>
</guide>