diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml index b557407..5634586 100644 --- a/.github/workflows/erlang.yml +++ b/.github/workflows/erlang.yml @@ -12,6 +12,7 @@ jobs: strategy: matrix: otp: + - '26.1.2' - '25.2.1' - '24.3.4' - '23.3.4' diff --git a/src/robots.erl b/src/robots.erl index 116f0c1..17d6669 100644 --- a/src/robots.erl +++ b/src/robots.erl @@ -134,7 +134,7 @@ handle_line(Line) -> sort_rules(_, Value = {allowed, all}) -> Value; sort_rules(_, {Allowed, Disallowed}) -> - Compare = fun(R1, R2) -> not (R1 =< R2) end, + Compare = fun(R1, R2) -> R1 > R2 end, {lists:sort(Compare, Allowed), lists:sort(Compare, Disallowed)}; sort_rules(sitemap, Value) -> Value. @@ -143,17 +143,23 @@ sort_rules(sitemap, Value) -> trim(String) -> string:trim(String, both). --spec build_rules({binary(), binary()}, {[agent()], boolean(), rules_index()}) -> - {[agent()], boolean(), rules_index()}. +-spec build_rules({binary(), binary()}, {[agent()], IsFirstAgent, rules_index()}) -> + {[agent()], IsFirstAgent, rules_index()} +when + IsFirstAgent :: boolean(). build_rules({<<"user-agent">>, RawAgent}, {Agents, false, RulesIndex}) -> Reversed = to_agent(RawAgent), {[Reversed | Agents], false, RulesIndex}; build_rules({<<"user-agent">>, RawAgent}, {_Agents, true, RulesIndex}) -> Reversed = to_agent(RawAgent), {[Reversed], false, RulesIndex}; +build_rules({<<"allow">>, <<>>}, {Agents, _, RulesIndex}) -> + {Agents, true, RulesIndex}; build_rules({<<"allow">>, Rule}, {Agents, _, RulesIndex}) -> {_, UpdatedIndex} = lists:foldl(fun update_index/2, {{allowed, Rule}, RulesIndex}, Agents), {Agents, true, UpdatedIndex}; +build_rules({<<"disallow">>, <<>>}, {Agents, _, RulesIndex}) -> + {Agents, true, RulesIndex}; build_rules({<<"disallow">>, Rule}, {Agents, _, RulesIndex}) -> {_, UpdatedIndex} = lists:foldl(fun update_index/2, {{disallowed, Rule}, RulesIndex}, Agents), {Agents, true, UpdatedIndex}; diff --git a/test/robots_SUITE.erl b/test/robots_SUITE.erl index c024658..9b1a93f 100644 --- a/test/robots_SUITE.erl +++ b/test/robots_SUITE.erl @@ -21,6 +21,14 @@ -define(ANOTHER_RULE, <<"/bar">>). -define(A_VALID_CODE, 200). -define(A_VALID_CONTENT, <<"User-Agent: ", ?USER_AGENT/binary, "\nAllow: ", ?A_RULE/binary>>). +-define(SOME_CONTENT_WITH_EMPTY_RULES, << + "# START YOAST BLOCK\n" + "# ---------------------------\n" + "User-agent: *\n" + "Disallow:\n" + "# ---------------------------\n" + "# END YOAST BLOCK\n" +>>). -define(ANOTHER_VALID_CONTENT, <<"User-Agent: ", ?USER_AGENT/binary, "\nAllow: ", ?A_RULE/binary, "\nDisallow: ", ?ANOTHER_RULE/binary>> @@ -58,7 +66,8 @@ groups() -> match_independently_of_the_casing_of_the_agent, return_false_if_agent_is_disallowed, return_true_if_no_matching_rules_can_be_found, - return_true_if_everything_is_allowed_for_the_corresponding_agent + return_true_if_everything_is_allowed_for_the_corresponding_agent, + ignore_empty_rules ]} ]. @@ -240,6 +249,16 @@ return_true_if_everything_is_allowed_for_the_corresponding_agent(_Config) -> ?assert(robots:is_allowed(?USER_AGENT, ?AN_URL, RulesIndex)). +ignore_empty_rules() -> + [ + {doc, + "Given a robot.txt with a wildcard associated with an empty rules, when parsing, then allow everything."} + ]. +ignore_empty_rules(_Config) -> + {ok, RulesIndex} = robots:parse(?SOME_CONTENT_WITH_EMPTY_RULES, ?A_VALID_CODE), + + ?assert(robots:is_allowed(?USER_AGENT, ?AN_URL, RulesIndex)). + %%%=================================================================== %%% Internal functions %%%===================================================================