summaryrefslogtreecommitdiff
blob: aa5cd4a3877ea1d3954f394e64419303401a9f10 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# Copyright 1999-2010 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# $Header: /var/cvsroot/gentoo-x86/app-text/tesseract/tesseract-2.04-r1.ebuild,v 1.7 2010/11/20 11:37:09 armin76 Exp $

EAPI="2"

inherit eutils autotools

DESCRIPTION="An OCR Engine that was developed at HP and now at Google"
HOMEPAGE="http://code.google.com/p/tesseract-ocr/"
SRC_URI="http://tesseract-ocr.googlecode.com/files/${P}.tar.gz
	linguas_ar? ( http://tesseract-ocr.googlecode.com/files/${PN}-ocr-${PV}.ara.tar.gz )
	linguas_bg? ( http://tesseract-ocr.googlecode.com/files/bul.traineddata.gz )
	linguas_ca? ( http://tesseract-ocr.googlecode.com/files/cat.traineddata.gz )
	linguas_cs? ( http://tesseract-ocr.googlecode.com/files/ces.traineddata.gz )
	linguas_da? (
		http://tesseract-ocr.googlecode.com/files/dan.traineddata.gz
		http://tesseract-ocr.googlecode.com/files/dan-frak.traineddata.gz
	)
	linguas_de? (
		http://tesseract-ocr.googlecode.com/files/deu.traineddata.gz
		http://tesseract-ocr.googlecode.com/files/deu-frak.traineddata.gz
	)
	linguas_el? ( http://tesseract-ocr.googlecode.com/files/ell.traineddata.gz )
	linguas_en? (
		http://tesseract-ocr.googlecode.com/files/${PN}-ocr-${PV}.eng.tar.gz
		http://tesseract-ocr.googlecode.com/files/eng.traineddata.gz
	)
	linguas_es? ( http://tesseract-ocr.googlecode.com/files/spa.traineddata.gz )
	linguas_fi? ( http://tesseract-ocr.googlecode.com/files/fin.traineddata.gz )
	linguas_fr? ( http://tesseract-ocr.googlecode.com/files/fra.traineddata.gz )
	linguas_he? (
		http://tesseract-ocr.googlecode.com/files/${PN}-ocr-${PV}.heb.tar.gz
		http://tesseract-ocr.googlecode.com/files/${PN}-ocr-${PV}.heb-com.tar.gz
	)
	linguas_hi? ( http://tesseract-ocr.googlecode.com/files/${PN}-ocr-${PV}.hin.tar.gz )
	linguas_hu? ( http://tesseract-ocr.googlecode.com/files/hun.traineddata.gz )
	linguas_id? ( http://tesseract-ocr.googlecode.com/files/ind.traineddata.gz )
	linguas_it? ( http://tesseract-ocr.googlecode.com/files/ita.traineddata.gz )
	linguas_ja? ( http://tesseract-ocr.googlecode.com/files/jpn.traineddata.gz )
	linguas_ko? ( http://tesseract-ocr.googlecode.com/files/kor.traineddata.gz )
	linguas_lt? ( http://tesseract-ocr.googlecode.com/files/lit.traineddata.gz )
	linguas_lv? ( http://tesseract-ocr.googlecode.com/files/lav.traineddata.gz )
	linguas_nl? ( http://tesseract-ocr.googlecode.com/files/nld.traineddata.gz )
	linguas_no? ( http://tesseract-ocr.googlecode.com/files/nor.traineddata.gz )
	linguas_pl? ( http://tesseract-ocr.googlecode.com/files/pol.traineddata.gz )
	linguas_pt? ( http://tesseract-ocr.googlecode.com/files/por.traineddata.gz )
	linguas_ro? ( http://tesseract-ocr.googlecode.com/files/ron.traineddata.gz )
	linguas_ru? ( http://tesseract-ocr.googlecode.com/files/rus.traineddata.gz )
	linguas_sk? ( http://tesseract-ocr.googlecode.com/files/slk.traineddata.gz )
	linguas_sl? ( http://tesseract-ocr.googlecode.com/files/slv.traineddata.gz )
	linguas_sr@latin? ( http://tesseract-ocr.googlecode.com/files/srp.traineddata.gz )
	linguas_sv? (
		http://tesseract-ocr.googlecode.com/files/swe.traineddata.gz
		http://tesseract-ocr.googlecode.com/files/swe-frak.traineddata.gz
	)
	linguas_th? ( http://tesseract-ocr.googlecode.com/files/${PN}-ocr-${PV}.tha.tar.gz )
	linguas_tl? ( http://tesseract-ocr.googlecode.com/files/tgl.traineddata.gz )
	linguas_tr? ( http://tesseract-ocr.googlecode.com/files/tur.traineddata.gz )
	linguas_uk? ( http://tesseract-ocr.googlecode.com/files/ukr.traineddata.gz )
	linguas_vi? ( http://tesseract-ocr.googlecode.com/files/vie.traineddata.gz )
	linguas_zh? (
		http://tesseract-ocr.googlecode.com/files/chi_tra.traineddata.gz
		http://tesseract-ocr.googlecode.com/files/chi_sim.traineddata.gz
	)"

LICENSE="Apache-2.0"
SLOT="0"
KEYWORDS="~amd64 ~x86"
IUSE="examples tiff linguas_ar linguas_bg linguas_ca linguas_cs linguas_da linguas_de linguas_el linguas_en linguas_es linguas_fi linguas_fr linguas_he linguas_hi linguas_hu linguas_id linguas_it linguas_ja linguas_ko linguas_lt linguas_lv linguas_nl linguas_no linguas_pl linguas_pt linguas_ro linguas_ru linguas_sk linguas_sl linguas_sr@latin linguas_sv linguas_th linguas_tl linguas_tr linguas_uk linguas_vi linguas_zh"

DEPEND="
	media-libs/leptonica
	tiff? ( media-libs/tiff )
"
RDEPEND="${DEPEND}"

src_unpack() {
	unpack ${A}
	cd "${S}"
	mkdir -p config
	mkdir -p m4
	AT_M4DIR="config" eautoreconf || die "eautoreconf failed"
}

src_prepare() {
	mv "${WORKDIR}"/tesseract-ocr/tessdata/* tessdata/ || die "move language files failed"
	rm "${S}/java/makefile" || die "remove obsolete java makefile failed"
}

src_install() {
	emake DESTDIR="${D}" install || die "emake install failed"

	dodoc AUTHORS ChangeLog NEWS README ReleaseNotes || die "dodoc failed"

	if use examples; then
		insinto /usr/share/doc/${PF}/examples
		doins eurotext.tif phototest.tif || die "doins failed"
	fi
}