Lingua::JA::TermExtractor is a term extractor written in Perl. This extracts terms from a document or documents.
SYNOPSIS
use Lingua::JA::TermExtractor;
use utf8;
use feature qw/say/;
use Data::Printer;
my $extractor = Lingua::JA::TermExtractor->new(
api => 'YahooPremium',
appid => $appid,
fetch_df => 1,
Furl_HTTP => { timeout => 3 },
driver => 'TokyoTyrant',
df_file => 'localhost:1978',
pos1_filter => [qw/非自立 代名詞 数 ナイ形容詞語幹 副詞可能 サ変接続/],
term_length_min => 2,
tf_min => 2,
df_min => 1_0000,
df_max => 1000_0000,
ng_word => [qw/·集 本人 自身 自分 たち さん/],
fetch_unk_word_df => 0,
concat_max => 100,
);
p $extractor->extract($document)->dump;
p $extractor->extract(\@documents)->dump;
for my $result (@{ $extractor->extract(\@documents)->list(50) })
{
my ($word, $score) = each %{$result};
say "$word: $score";
}
Product's homepage
Requirements:
· Perl