-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Change extraction method using API and prepare package for v2.1 release
- Loading branch information
1 parent
a0c9f0f
commit 5095b52
Showing
15 changed files
with
368 additions
and
83 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,5 +3,7 @@ __pycache__/ | |
*$py.class | ||
|
||
/src/lib | ||
/src/workflow | ||
/venv | ||
|
||
.python-version |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/sh | ||
pip install --target=src Alfred-Workflow --upgrade | ||
rm -r src/*.dist-info | ||
mkdir src/lib | ||
pip install -r src/requirements.txt --target=src/lib --upgrade |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,120 @@ | ||
from bs4 import BeautifulSoup | ||
import requests | ||
import logging | ||
import json | ||
import os | ||
|
||
class PowerThesaurus: | ||
|
||
def __init__(self, api_url, logger=logging): | ||
USER_AGENT = "Alfred-Powerthesaurus/2.1.0" | ||
GQL_THESAURUS_QUERY = "thesaurus_query" | ||
GQL_SEARCH_QUERY = "search_query" | ||
|
||
def __init__(self, api_url, web_url, gql_queries_dir="./gql_queries/", pos_file_path="./pos.json", logger=logging): | ||
self.api_url = api_url | ||
self.web_url = web_url | ||
self.logger = logger | ||
self.gql_queries = self.read_gql_queries(gql_queries_dir) | ||
self.pos_mapping = self.read_pos_mapping(pos_file_path) | ||
self.request_headers = self.build_request_headers() | ||
|
||
def build_url(self, slug, query_type): | ||
return '{}/{}/{}'.format(self.web_url, slug, query_type) | ||
|
||
def parse_term(self, term_data): | ||
def build_request_headers(self): | ||
return { | ||
'term': term_data['term'], | ||
'topics': [t['topic'] for t in term_data['topics']], | ||
'rating': int(term_data['rating']), | ||
'parts': [p['short_name'] for p in term_data['parts']] | ||
"user-agent": PowerThesaurus.USER_AGENT, | ||
"content-type": "application/json" | ||
} | ||
|
||
def extract_terms(self, page_text): | ||
soup = BeautifulSoup(page_text, 'html.parser') | ||
script = soup.find('script', src='').getText().strip().split('\n')[0] | ||
data = json.loads(script[script.find('{'):-1]) | ||
def read_pos_mapping(self, file_path): | ||
pos_mapping = {} | ||
with open(file_path, 'r') as file: | ||
pos_list = json.loads(file.read()) | ||
for pos in pos_list: | ||
pos_mapping[pos['id']] = pos | ||
return pos_mapping | ||
|
||
if 'list' not in data: | ||
return [] | ||
def read_gql_queries(self, dir): | ||
gql_queries = {} | ||
files = os.listdir(dir) | ||
for filename in files: | ||
file_path = os.path.join(dir, filename) | ||
with open(file_path, 'r') as file: | ||
# get filename without ext | ||
key = os.path.splitext(filename)[0] | ||
gql_queries[key] = file.read() | ||
return gql_queries | ||
|
||
return data['list']['pages'][0]['terms'] | ||
def build_search_query_params(self, query): | ||
return { | ||
"operationName": "SEARCH_QUERY", | ||
"variables": { | ||
"query": query | ||
}, | ||
"query": self.gql_queries[PowerThesaurus.GQL_SEARCH_QUERY] | ||
} | ||
|
||
def build_search_url(self, word, query_type): | ||
return '{}/{}/{}'.format(self.api_url, word.replace(' ', '_'), query_type) | ||
def build_thesaurus_query_params(self, term_id, query_type): | ||
return { | ||
"operationName": "THESAURUSES_QUERY", | ||
"variables": { | ||
"list": query_type.upper(), | ||
"termID": term_id, | ||
"sort": { | ||
"field": "RATING", | ||
"direction": "DESC" | ||
}, | ||
"limit": 50, | ||
"syllables": None, | ||
"query": None, | ||
"posID": None, | ||
"first": 50, | ||
"after": "" | ||
}, | ||
"query": self.gql_queries[PowerThesaurus.GQL_THESAURUS_QUERY] | ||
} | ||
|
||
def search(self, word, query_type): | ||
r = requests.get(self.build_search_url(word, query_type), headers={'user-agent': 'alfred-powerthesaurus/2.0'}) | ||
self.logger.debug('response : {} {}'.format(r.status_code, r.url)) | ||
def parse_thesaurus_query_response(self, response): | ||
edges = response['data']['thesauruses']['edges'] | ||
results = map(lambda e : e['node'], edges) | ||
return map(lambda r : { | ||
'id': r['targetTerm']['id'], | ||
'word': r['targetTerm']['name'], | ||
'slug': r['targetTerm']['slug'], | ||
'parts_of_speech': map(lambda p : self.pos_mapping[p]['shorter'], r['relations']['parts_of_speech']), | ||
'tags': r['relations']['tags'], | ||
'synonyms_count': r['targetTerm']['counters']['synonyms'], | ||
'antonyms_count': r['targetTerm']['counters']['antonyms'], | ||
'rating': r['rating'], | ||
'url_synonyms': self.build_url(r['targetTerm']['slug'], 'synonyms'), | ||
'url_antonyms': self.build_url(r['targetTerm']['slug'], 'antonyms') | ||
}, results) | ||
|
||
def thesaurus_query(self, term_id, query_type): | ||
if not term_id: | ||
return [] | ||
params = self.build_thesaurus_query_params(term_id, query_type) | ||
r = requests.post(self.api_url, json=params, headers=self.request_headers) | ||
self.logger.debug('thesaurus_query: {} {}'.format(r.status_code, r.url)) | ||
r.raise_for_status() | ||
return self.parse_thesaurus_query_response(r.json()) | ||
|
||
def parse_search_query_response(self, response): | ||
terms = response['data']['search']['terms'] | ||
return map(lambda t : { | ||
'id': t['id'], | ||
'word': t['name'], | ||
}, terms) | ||
|
||
def search_query(self, query): | ||
params = self.build_search_query_params(query) | ||
r = requests.post(self.api_url, json=params, headers=self.request_headers) | ||
self.logger.debug('search_query: {} {}'.format(r.status_code, r.url)) | ||
r.raise_for_status() | ||
data = self.extract_terms(r.text) | ||
return [self.parse_term(t) for t in data] | ||
return self.parse_search_query_response(r.json()) | ||
|
||
def search_query_match(self, query): | ||
terms = self.search_query(query) | ||
if not terms or terms[0]['word'] != query: | ||
return None | ||
return terms[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
query SEARCH_QUERY($query: String!) { | ||
search(query: $query) { | ||
terms { | ||
id | ||
name | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
query THESAURUSES_QUERY($after: String, $first: Int, $before: String, $last: Int, $termID: ID!, $list: List!, $sort: ThesaurusSorting!, $tagID: Int, $posID: Int, $syllables: Int) { | ||
thesauruses(termId: $termID, sort: $sort, list: $list, after: $after, first: $first, before: $before, last: $last, tagId: $tagID, partOfSpeechId: $posID, syllables: $syllables) { | ||
totalCount | ||
pageInfo { | ||
hasNextPage | ||
hasPreviousPage | ||
startCursor | ||
endCursor | ||
__typename | ||
} | ||
edges { | ||
node { | ||
_type | ||
id | ||
isPinned | ||
targetTerm { | ||
id | ||
name | ||
slug | ||
counters | ||
isFavorite | ||
__typename | ||
} | ||
relations | ||
rating | ||
vote { | ||
voteType | ||
id | ||
__typename | ||
} | ||
votes | ||
__typename | ||
} | ||
cursor | ||
__typename | ||
} | ||
__typename | ||
} | ||
} |
Oops, something went wrong.