Skip to content

Commit

Permalink
Change extraction method using API and prepare package for v2.1 release
Browse files Browse the repository at this point in the history
  • Loading branch information
clarencecastillo committed Apr 8, 2020
1 parent a0c9f0f commit 5095b52
Show file tree
Hide file tree
Showing 15 changed files with 368 additions and 83 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@ __pycache__/
*$py.class

/src/lib
/src/workflow
/venv

.python-version
1 change: 0 additions & 1 deletion .python-version

This file was deleted.

2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2019 Clarence Castillo
Copyright (c) 2020 Clarence Castillo

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
Binary file removed Powerthesaurus-2.0.alfredworkflow
Binary file not shown.
Binary file added Powerthesaurus-2.1.0.alfredworkflow
Binary file not shown.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Powerthesaurus Search for Alfred #

Search for synonyms and antonyms on [Powerthesaurus.org](https://www.powerthesaurus.org) from [Alfred 3 & 4](https://www.alfredapp.com/).
Search for synonyms and antonyms on [Powerthesaurus.org](https://www.powerthesaurus.org) from [Alfred 4](https://www.alfredapp.com/).

![](demo.gif "")

Expand Down
5 changes: 5 additions & 0 deletions init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/sh
pip install --target=src Alfred-Workflow --upgrade
rm -r src/*.dist-info
mkdir src/lib
pip install -r src/requirements.txt --target=src/lib --upgrade
124 changes: 103 additions & 21 deletions src/api.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,120 @@
from bs4 import BeautifulSoup
import requests
import logging
import json
import os

class PowerThesaurus:

def __init__(self, api_url, logger=logging):
USER_AGENT = "Alfred-Powerthesaurus/2.1.0"
GQL_THESAURUS_QUERY = "thesaurus_query"
GQL_SEARCH_QUERY = "search_query"

def __init__(self, api_url, web_url, gql_queries_dir="./gql_queries/", pos_file_path="./pos.json", logger=logging):
self.api_url = api_url
self.web_url = web_url
self.logger = logger
self.gql_queries = self.read_gql_queries(gql_queries_dir)
self.pos_mapping = self.read_pos_mapping(pos_file_path)
self.request_headers = self.build_request_headers()

def build_url(self, slug, query_type):
return '{}/{}/{}'.format(self.web_url, slug, query_type)

def parse_term(self, term_data):
def build_request_headers(self):
return {
'term': term_data['term'],
'topics': [t['topic'] for t in term_data['topics']],
'rating': int(term_data['rating']),
'parts': [p['short_name'] for p in term_data['parts']]
"user-agent": PowerThesaurus.USER_AGENT,
"content-type": "application/json"
}

def extract_terms(self, page_text):
soup = BeautifulSoup(page_text, 'html.parser')
script = soup.find('script', src='').getText().strip().split('\n')[0]
data = json.loads(script[script.find('{'):-1])
def read_pos_mapping(self, file_path):
pos_mapping = {}
with open(file_path, 'r') as file:
pos_list = json.loads(file.read())
for pos in pos_list:
pos_mapping[pos['id']] = pos
return pos_mapping

if 'list' not in data:
return []
def read_gql_queries(self, dir):
gql_queries = {}
files = os.listdir(dir)
for filename in files:
file_path = os.path.join(dir, filename)
with open(file_path, 'r') as file:
# get filename without ext
key = os.path.splitext(filename)[0]
gql_queries[key] = file.read()
return gql_queries

return data['list']['pages'][0]['terms']
def build_search_query_params(self, query):
return {
"operationName": "SEARCH_QUERY",
"variables": {
"query": query
},
"query": self.gql_queries[PowerThesaurus.GQL_SEARCH_QUERY]
}

def build_search_url(self, word, query_type):
return '{}/{}/{}'.format(self.api_url, word.replace(' ', '_'), query_type)
def build_thesaurus_query_params(self, term_id, query_type):
return {
"operationName": "THESAURUSES_QUERY",
"variables": {
"list": query_type.upper(),
"termID": term_id,
"sort": {
"field": "RATING",
"direction": "DESC"
},
"limit": 50,
"syllables": None,
"query": None,
"posID": None,
"first": 50,
"after": ""
},
"query": self.gql_queries[PowerThesaurus.GQL_THESAURUS_QUERY]
}

def search(self, word, query_type):
r = requests.get(self.build_search_url(word, query_type), headers={'user-agent': 'alfred-powerthesaurus/2.0'})
self.logger.debug('response : {} {}'.format(r.status_code, r.url))
def parse_thesaurus_query_response(self, response):
edges = response['data']['thesauruses']['edges']
results = map(lambda e : e['node'], edges)
return map(lambda r : {
'id': r['targetTerm']['id'],
'word': r['targetTerm']['name'],
'slug': r['targetTerm']['slug'],
'parts_of_speech': map(lambda p : self.pos_mapping[p]['shorter'], r['relations']['parts_of_speech']),
'tags': r['relations']['tags'],
'synonyms_count': r['targetTerm']['counters']['synonyms'],
'antonyms_count': r['targetTerm']['counters']['antonyms'],
'rating': r['rating'],
'url_synonyms': self.build_url(r['targetTerm']['slug'], 'synonyms'),
'url_antonyms': self.build_url(r['targetTerm']['slug'], 'antonyms')
}, results)

def thesaurus_query(self, term_id, query_type):
if not term_id:
return []
params = self.build_thesaurus_query_params(term_id, query_type)
r = requests.post(self.api_url, json=params, headers=self.request_headers)
self.logger.debug('thesaurus_query: {} {}'.format(r.status_code, r.url))
r.raise_for_status()
return self.parse_thesaurus_query_response(r.json())

def parse_search_query_response(self, response):
terms = response['data']['search']['terms']
return map(lambda t : {
'id': t['id'],
'word': t['name'],
}, terms)

def search_query(self, query):
params = self.build_search_query_params(query)
r = requests.post(self.api_url, json=params, headers=self.request_headers)
self.logger.debug('search_query: {} {}'.format(r.status_code, r.url))
r.raise_for_status()
data = self.extract_terms(r.text)
return [self.parse_term(t) for t in data]
return self.parse_search_query_response(r.json())

def search_query_match(self, query):
terms = self.search_query(query)
if not terms or terms[0]['word'] != query:
return None
return terms[0]
8 changes: 8 additions & 0 deletions src/gql_queries/search_query.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
query SEARCH_QUERY($query: String!) {
search(query: $query) {
terms {
id
name
}
}
}
39 changes: 39 additions & 0 deletions src/gql_queries/thesaurus_query.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
query THESAURUSES_QUERY($after: String, $first: Int, $before: String, $last: Int, $termID: ID!, $list: List!, $sort: ThesaurusSorting!, $tagID: Int, $posID: Int, $syllables: Int) {
thesauruses(termId: $termID, sort: $sort, list: $list, after: $after, first: $first, before: $before, last: $last, tagId: $tagID, partOfSpeechId: $posID, syllables: $syllables) {
totalCount
pageInfo {
hasNextPage
hasPreviousPage
startCursor
endCursor
__typename
}
edges {
node {
_type
id
isPinned
targetTerm {
id
name
slug
counters
isFavorite
__typename
}
relations
rating
vote {
voteType
id
__typename
}
votes
__typename
}
cursor
__typename
}
__typename
}
}
Loading

0 comments on commit 5095b52

Please sign in to comment.