diff --git a/.gitattributes b/.gitattributes index df85177..d5bc171 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,7 @@ * text=auto +*.html linguist-language=liquid *.css linguist-detectable=false -*.html linguist-detectable=false -*.js linguist-detectable=false \ No newline at end of file +*.js linguist-detectable=false + +assets/* linguist-detectable=false \ No newline at end of file diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 9b6666c..ee84783 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -32,10 +32,8 @@ jobs: - name: Checkout uses: actions/checkout@v4 with: - ref: ${{ inputs.ref || github.event.client_payload.ref || env.GITHUB_REF_NAME }} + ref: ${{ inputs.ref || github.event.client_payload.ref || env.GITHUB_REF_NAME }} - run: ./scripts/build-data.sh --remote - - run: | - ./scripts/group.sh > ./_pages/actvity.md - uses: ruby/setup-ruby@v1 with: ruby-version: 3.2 diff --git a/README.md b/README.md index 0b512cb..8190433 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,51 @@ ```sh ./scripts/build-data.sh --out _data ./scripts/build-data.sh --url https://url/to/annotations.json -./scripts/group.sh > _pages/activity.md + +# locally +./scripts/build-data.sh annotations.json ``` ```sh - bundle install # or bundle install --local bundle exec jekyll build bundle exec jekyll serve ``` +## Building _data + +```sh +# create _data/books.json +jq -L scripts 'include "annotations"; create_book_data' annotations.json + +# create _data/activity.json +jq -L scripts 'include "annotations"; create_activity_data' annotations.json + +# create _data/genres.json +jq -L scripts 'include "annotations"; create_genre_data' annotations.json + +# create _data/words.json +jq -L scripts 'include "annotations"; create_word_data' annotations.json + +# create _data/stats/bookmarks_per_month.json +jq -L scripts 'include "annotations"; stats_bookmarks_per_month' activity.json + +# create _data/stats/month.json +jq -L scripts 'include "annotations"; stats_month' activity.json + +# create _data/history.json +jq -L scripts 'include "annotations"; stats_history' activity.json +``` + +## Build activity + +create _pages/activity.txt + +```sh +jq -L scripts --slurpfile books _data/books.json \ + 'include "annotations"; stats_history_text' _data/history.json +``` + # trigger update ```sh @@ -21,5 +56,5 @@ curl -H "Accept: application/vnd.github.everest-preview+json" \ --request POST \ -d '{"event_type":"update_data","client_payload": {"data_url": "URL HERE", "ref": "stage" }}' \ https://api.github.com/repos/nntrn/what-im-reading/dispatches +``` -``` \ No newline at end of file diff --git a/_config.yml b/_config.yml index 6d807b7..9e32a30 100644 --- a/_config.yml +++ b/_config.yml @@ -1,5 +1,5 @@ title: what i'm reading -description: Bookmarks from Apple Books +description: saved excerpts of things author: Annie Tran url: https://nntrn.github.io baseurl: /what-im-reading diff --git a/_pages/tags.html b/_pages/index.html similarity index 100% rename from _pages/tags.html rename to _pages/index.html diff --git a/scripts/annotations.jq b/scripts/annotations.jq index 3bdb504..0ae2aa2 100644 --- a/scripts/annotations.jq +++ b/scripts/annotations.jq @@ -1,27 +1,10 @@ include "books"; +def stop: { + all: ["able","about","across","after","all","almost","also","among","and","any","are","because","been","but","can","cannot","could","dear","did","does","either","else","ever","every","for","from","get","got","had","has","have","her","hers","him","his","how","however","into","its","just","least","let","like","likely","may","might","most","must","neither","nor","not","off","often","only","other","our","own","rather","said","say","says","she","should","since","some","than","that","the","their","them","then","there","these","they","this","tis","too","twas","wants","was","were","what","when","where","which","while","who","whom","why","will","with","would","yet","you","your"], -# jq -L scripts 'include "annotations"; create_book_data' annotations.json -# jq -L scripts 'include "annotations"; create_activity_data' annotations.json -# jq -L scripts 'include "annotations"; create_word_data' annotations.json - -# jq -L scripts 'include "annotations"; stats_bookmarks_per_month' activity.json -# jq -L scripts 'include "annotations"; stats_month' activity.json - -def unsmart: . | gsub("[“”]";"\"") | gsub("[’‘]";"'"); - -def remove_cites: - gsub("\\[([0-9xiv]+)\\]";"";"m") | gsub("(?[a-zA-Z][“”\"\\.]*)[0-9]+";.q); - -def format_quotes: - gsub("^[\\n ]+|[ \\n]+$";"") - | split("\n") - | map( - select(test("[a-zA-Z0-9]")) - | gsub("[\\t]+"; "\t") - | if length > 400 then gsub("(?<=[\\S\\s]{300,500}\\.) "; "

") else . end - ) - ; + compact: ["able","about","across","after","almost","also","among","because","been","cannot","could","dear","does","either","else","ever","every","from","have","hers","however","into","just","least","like","likely","might","most","must","neither","often","only","other","rather","said","says","should","since","some","than","that","their","them","then","there","these","they","this","twas","wants","were","what","when","where","which","while","whom","will","with","would","your"] +}; # functions using annotations.json ########################################### @@ -60,14 +43,18 @@ def create_word_data: Z_PK, ZASSETID, words: (.ZANNOTATIONSELECTEDTEXT + | ascii_downcase | gsub("[.?!] (?[A-Z])"; (.a|ascii_downcase);"x") - | gsub("\([39]|implode)"; "") - | gsub("[^a-zA-Z]+";" ";"x")|split(" ") - | map(select(length >4))|sort|group_by(.) - | map([.[0],length]) + | gsub("[[:punct:]] "; " ") + | gsub("[^a-zA-Z]+";" ";"x") + | split(" ") + | map(select(length >4)) + | sort + | group_by(.) + | map([(.[0]|ascii_downcase),length]) | sort_by(.[1]) | map(join("-")) - | reverse) + | reverse|join(" ")) }); def create_genre_data: @@ -80,6 +67,7 @@ def create_genre_data: }); # functions using activity.json ############################################### + def stats_bookmarks_per_month: sort_by(.created) | map(. + {groupby_label: (.created|fromdate|strftime("%b %Y"))}) @@ -95,4 +83,65 @@ def stats_month: saved_count: length, saved: (group_by(.assetid)|map({(.[0].assetid): length})|add) } - }) | add; \ No newline at end of file + }) | add; + +def stats_history: + map((.created|strptime("%Y-%m-%dT%H:%M:%SZ")|mktime) as $dt | + { + id,assetid,created, + date: $dt, + ymd: ($dt|strftime("%Y-%m-%d")), + month: ($dt|strftime("%b %Y")) + }) + | group_by(.month) + | map({ + month: .[0].month, + sortdt: (.[0].date|strftime("%Y %m")), + month_count: length, + activity: ( + group_by(.ymd) + | reverse + | map({ + date: .[0].ymd, + date_count: length, + books: (map(.assetid)|group_by(.) | map({ assetid: .[0], book_count: length })) + }) + ) + }) + | sort_by(.sortdt) + | reverse; + +# jq -r -L scripts --slurpfile books _data/books.json 'include "annotations"; stats_history_text' _data/history.json +def stats_history_text: + if ($books|length) > 0 then + ( [ + # title + "Reading activity", "="*16, + "", + "*** denotes date when book was started", + "", + # create markdown file showing activity from _data/history.json + # index $books to get date book was started + ($books[] | map(. + {cdate: (.created|strptime("%Y-%m-%dT%H:%M:%SZ")|mktime|strftime("%Y-%m-%d"))}) |INDEX(.[];.assetid)) as $asset | map([ + .month, + ("-" * (.month|length)), "", + (.activity | map( .date as $d2 | [ + " * **\(.date)**", "", + ( + .books | map((if .book_count > 1 then "annotations" else "annotation" end) as $noun + | (if $asset[.assetid].cdate == $d2 then " ***" else "" end) as $starttext + | " - [\($asset[.assetid].title|gsub("[\\[\\]]";""))][\(.assetid)] - \(.book_count) \($noun)\($starttext)" + ) + ), + "" + ]) + ), "" + ]), + # create link reference from _data/books.json + ($books[]|sort_by(.permalink)|map("[\(.assetid)]:\t\(.permalink)")) + ] + | flatten | join("\n") ) + else + error("path to _data/books.json was not passed") + end +; \ No newline at end of file diff --git a/scripts/books.jq b/scripts/books.jq index 8e36aa2..5a304fa 100644 --- a/scripts/books.jq +++ b/scripts/books.jq @@ -1,3 +1,8 @@ +# def stopwords: { +# all: ["able","about","across","after","all","almost","also","among","and","any","are","because","been","but","can","cannot","could","dear","did","does","either","else","ever","every","for","from","get","got","had","has","have","her","hers","him","his","how","however","into","its","just","least","let","like","likely","may","might","most","must","neither","nor","not","off","often","only","other","our","own","rather","said","say","says","she","should","since","some","than","that","the","their","them","then","there","these","they","this","tis","too","twas","wants","was","were","what","when","where","which","while","who","whom","why","will","with","would","yet","you","your"], +# compact: ["able","about","across","after","almost","also","among","because","been","cannot","could","dear","does","either","else","ever","every","from","have","hers","however","into","just","least","like","likely","might","most","must","neither","often","only","other","rather","said","says","should","since","some","than","that","their","them","then","there","these","they","this","twas","wants","were","what","when","where","which","while","whom","will","with","would","your"] +# } + def squo: [39]|implode; def lpad(n): tostring | if (n > length) then ((n - length) * "0") + . else . end; def squote($text): [squo,$text,squo]|join(""); @@ -22,13 +27,6 @@ def epublocation($cfi): | split("-")|.[1:] | map(select(length < 5)|tonumber); -def remove_citations($text): - $text - | gsub("(?[^0-9\\$,]{3})[0-9]{1,2}(?[^0-9%\\.,]{2})"; .a +.b; "x") - | gsub("(?[^0-9]{3})[0-9]{1,2}([\\s]+)?$"; .a +.b; "xs"); - -def remove_citations: remove_citations(.); - def split_long_title($text): $text | split("\\s?[(:)]\\s?";"x") @@ -52,6 +50,19 @@ def get_author_slug($s): | ascii_downcase ; +def remove_cites: + gsub("\\[([0-9xiv]+)\\]";"";"m") | gsub("(?[a-zA-Z][“”\"\\.]*)[0-9]+";.q); + +def format_quotes: + gsub("^[\\n ]+|[ \\n]+$";"") + | split("\n") + | map( + select(test("[a-zA-Z0-9]")) + | gsub("[\\t]+"; "\t") + | if length > 400 then gsub("(?<=[\\S\\s]{300,500}\\.) "; "\n\n") else . end + ) + ; + def identify_subsection: . as $string | if test("[cC][onclusi]{2,}") then "Conclusion" diff --git a/scripts/build-data.sh b/scripts/build-data.sh index c1489c3..bf13ecb 100755 --- a/scripts/build-data.sh +++ b/scripts/build-data.sh @@ -1,14 +1,15 @@ #!/usr/bin/env bash + set -e SCRIPT="$(realpath "$0")" DIR=${SCRIPT%/*/*} -SCRIPTDIR=${SCRIPT%/*} +_scriptdir=${SCRIPT%/*} DEFAULT_REMOTE_URL="https://raw.githubusercontent.com/nntrn/bookstand/assets/annotations.json" DEFAULT_ANNOTATIONS_FILE=annotations.json -export OUTDIR=$DIR/_data +export OUTDIR=$DIR export ANNOTATIONS_FILE=$DEFAULT_ANNOTATIONS_FILE export FETCH_REMOTE=0 export REMOTE_URL="$DEFAULT_REMOTE_URL" @@ -16,21 +17,36 @@ export RUN_ONLY= _log() { echo -e "\033[0;${2:-33}m$1\033[0m" 3>&2 2>&1 >&3 3>&-; } -call_jq_func() { +set_remote() { + FETCH_REMOTE=1 + if [[ $1 == "https://"* ]]; then + REMOTE_URL="$1" + elif [[ -n $1 ]]; then + _log "Did not set $1" + fi +} + +jq_from_annotations() { local func=$1 - local source=${2:-$ANNOTATIONS_FILE} - local logcolor=${3:-35} - _log "* Creating $func" $logcolor - cat $source | jq -L $SCRIPTDIR "include \"annotations\"; $func" + _log "* Running $func" 36 + jq -L $_scriptdir "include \"annotations\"; $func" $ANNOTATIONS_FILE } -call_jq_stat_func() { call_jq_func $1 $OUTDIR/activity.json 36; } +jq_from_activity() { + local func=$1 + _log "* Running $func" 35 + jq -L $_scriptdir "include \"annotations\"; $func" $DATADIR/activity.json +} get_annotations() { - if [[ ! -f $ANNOTATIONS_FILE ]] || [[ ! -s $ANNOTATIONS_FILE ]] || [[ $FETCH_REMOTE -eq 1 ]]; then - _log "Fetching annotations from $REMOTE_URL" 37 - curl -s -o $ANNOTATIONS_FILE "${REMOTE_URL:-$DEFAULT_REMOTE_URL}" + + if [[ ! $REMOTE_URL == "https://"* ]]; then + REMOTE_URL=$DEFAULT_REMOTE_URL fi + + _log "Fetching annotations from $REMOTE_URL" 37 + curl -s -o $ANNOTATIONS_FILE "${REMOTE_URL}" + return $? } @@ -38,43 +54,54 @@ while true; do case $1 in -o | --out) OUTDIR="$2" && shift ;; -R | --run-only) RUN_ONLY="$2" && shift ;; - -u | --url) - if [[ $2 == "https://"* ]]; then - REMOTE_URL="$2" - FETCH_REMOTE=1 - shift - fi - ;; - -r | --remote) FETCH_REMOTE=1 ;; - -f | --force) FETCH_REMOTE=1 ;; - https*.json) REMOTE_URL="$1" ;; + -u | --url) set_remote "$2" && shift ;; + -r | --remote) set_remote ;; + https*.json) set_remote "$1" ;; *.json) ANNOTATIONS_FILE="$1" ;; esac shift || break done -mkdir -p $OUTDIR/stats +export DATADIR=$OUTDIR/_data -get_annotations & -wait %1 +_main() { -if [[ -n $RUN_ONLY ]]; then - _log "Run only: $RUN_ONLY" - if [[ $RUN_ONLY == "stat_"* ]]; then - call_jq_stat_func $RUN_ONLY - else - call_jq_func $RUN_ONLY + if [[ ! -f $ANNOTATIONS_FILE ]] || + [[ ! -s $ANNOTATIONS_FILE ]] || + [[ $FETCH_REMOTE -eq 1 ]]; then + + get_annotations & + wait %1 fi -else + if [[ -n $RUN_ONLY ]]; then + if [[ $RUN_ONLY == "stat_"* ]]; then + jq_from_activity $RUN_ONLY + else + jq_from_annotations $RUN_ONLY + fi + else + + mkdir -p $OUTDIR/{_data/stats,_pages} - call_jq_func create_book_data >$OUTDIR/books.json & - call_jq_func create_genre_data >$OUTDIR/genres.json & - call_jq_func create_activity_data >$OUTDIR/activity.json & - wait %3 + jq_from_annotations create_book_data >$DATADIR/books.json & + jq_from_annotations create_genre_data >$DATADIR/genres.json & + jq_from_annotations create_activity_data >$DATADIR/activity.json & + # jq_from_annotations create_word_data >$DATADIR/words.json & + wait %3 - call_jq_stat_func stats_bookmarks_per_month >$OUTDIR/stats/bookmarks_per_month.json & - call_jq_stat_func stats_month >$OUTDIR/stats/month.json & - wait %2 + jq_from_activity stats_bookmarks_per_month >$DATADIR/stats/bookmarks_per_month.json & + jq_from_activity stats_month >$DATADIR/stats/month.json & + jq_from_activity stats_history >$DATADIR/history.json & + wait %3 + + _log "* Creating _pages/activity.txt" + jq -r -L $_scriptdir \ + --slurpfile books $DATADIR/books.json \ + 'include "annotations"; stats_history_text' $DATADIR/history.json 1>$OUTDIR/_pages/activity.txt & + wait %1 + + fi +} -fi +_main diff --git a/scripts/group.sh b/scripts/group.sh deleted file mode 100755 index def5ffc..0000000 --- a/scripts/group.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env bash -set -e - -SCRIPT="$(realpath "$0")" -DIR=${SCRIPT%/*/*} - -DATADIR=$DIR/_data -ASSETSDIR=$DIR/assets/data -HISTORYDATAPATH=$ASSETSDIR/history.json - -mkdir -p $ASSETSDIR - -jq 'map( - (.created|strptime("%Y-%m-%dT%H:%M:%SZ")|mktime) as $dt - | { - id,assetid,created, - date:$dt, - ymd: ($dt|strftime("%Y-%m-%d")), month: ($dt|strftime("%b %Y")) - } - ) - | group_by(.month) - | map({ - month: .[0].month, - sortdt: (.[0].date|strftime("%Y %m")), - count: length, - dates: ( - map({assetid, date, ymd}) - | group_by(.ymd) - | map({ - date: .[0].ymd, - books: (map(.assetid)|group_by(.)|map({ assetid: .[0], annotation_count: length })) - }) - | reverse - ) - }) - | sort_by(.sortdt)|reverse' $DATADIR/activity.json >$HISTORYDATAPATH - -jq -n --unbuffered env &>/dev/null - -echo '--- -title: activity -layout: default -permalink: /activity ---- - - -' - -cat $HISTORYDATAPATH | - jq -r --slurpfile books $DATADIR/books.json ' - ($books[] | - map(. + {cdate: (.created|strptime("%Y-%m-%dT%H:%M:%SZ")|mktime|strftime("%Y-%m-%d"))}) | - INDEX(.[];.assetid) - ) as $asset - | map( - [ - "## " +.month, - "", - (.dates - | map( - .date as $d2 | - [ - "* **\(.date)**" , - "", - (.books | map(" - " + (if $asset[.assetid].cdate == $d2 then "Started " else "" end) + "[" + $asset[.assetid].title + "][\(.assetid)] - \(.annotation_count) annotation\(if .annotation_count > 1 then "s" else "" end)") |join("\n")), - "" - ] |flatten(2) - )), - "" - ] - | flatten - | join("\n") - ) -| join("\n")' - -jq -r 'sort_by(.permalink)|map("[\(.assetid)]:\t\(.permalink)")|join("\n")' $DATADIR/books.json